1 Supplementary Figure 1


AI中完成制作






2 Supplementary Figure 2


AI中完成制作






3 Supplementary Figure 3


3.1 FigS 3A

cd /data1/qtl/code_for_submit/data

R
library(data.table)
library(tidyr)
library(dplyr)
library(CMplot)
library(patchwork)
library(magick)
library(pdftools)
library(grid)
library(cowplot)
library(gridExtra)
rm(list = ls())
dat1 <- data.table::fread('338Lung_aqtl_5pca.fdr005.batch.txt.gz')
dat1 <- dat1[,c('variant_id','pval_nominal')]
names(dat1) <- c('MarkerName','pvalue')
dat1$CHR <- sapply(strsplit(dat1$MarkerName,':'),'[',1)
dat1$BP <- sapply(strsplit(dat1$MarkerName,':'),'[',2)
dat_plot1 <- dat1[,c('MarkerName','CHR','BP','pvalue')]
names(dat_plot1) <- c('SNP','Chromosome','Position','apaQTL')
dat_plot1 <- subset(dat_plot1,Chromosome%in%c(1:22))
cols=c("#C65555","#844040")
CMplot::CMplot(dat_plot1, plot.type="m", LOG10=TRUE, threshold.lwd=c(1,1), threshold.col='#BC3C29', main.font=1, ylab=expression(-log[10](italic(P))),
               threshold=NULL, threshold.lty=c(1,2), chr.den.col=NULL, col = cols, cex =c(0.5,0.5,0.5), lab.font=1, main='Manhattan plot of significant apaQTLs', height = 4, width = 8.5,
               amplify= TRUE, signal.cex=0.5, file="pdf", dpi=300, file.output=T, verbose=TRUE, legend.pos='middle')

3.2 FigS 3B

dat1 <- data.table::fread('338Lung_eqtl_5pca.fdr005.batch.txt.gz')
dat1 <- dat1[,c('variant_id','pval_nominal')]
names(dat1) <- c('MarkerName','pvalue')
dat1$CHR <- sapply(strsplit(dat1$MarkerName,':'),'[',1)
dat1$BP <- sapply(strsplit(dat1$MarkerName,':'),'[',2)
dat_plot1 <- dat1[,c('MarkerName','CHR','BP','pvalue')]
names(dat_plot1) <- c('SNP','Chromosome','Position','eQTL')
dat_plot1 <- subset(dat_plot1,Chromosome%in%c(1:22))
cols=c("#5599C9","#3F6585")
CMplot::CMplot(dat_plot1, plot.type="m", LOG10=TRUE, threshold.lwd=c(1,1), threshold.col='#BC3C29', main.font=1, ylab=expression(-log[10](italic(P))),
               threshold=NULL, threshold.lty=c(1,2), chr.den.col=NULL, col = cols, cex =c(0.5,0.5,0.5), lab.font=1, main='Manhattan plot of significant eQTLs', height = 4, width = 8.5,
               amplify= TRUE, signal.cex=0.5, file="pdf", dpi=300, file.output=T, verbose=TRUE, legend.pos='middle')

3.3 FigS 3C

dat1 <- data.table::fread('338Lung_sqtl_5pca.fdr005.batch.txt.gz')
dat1 <- dat1[,c('variant_id','pval_nominal')]
names(dat1) <- c('MarkerName','pvalue')
dat1$CHR <- sapply(strsplit(dat1$MarkerName,':'),'[',1)
dat1$BP <- sapply(strsplit(dat1$MarkerName,':'),'[',2)
dat_plot1 <- dat1[,c('MarkerName','CHR','BP','pvalue')]
names(dat_plot1) <- c('SNP','Chromosome','Position','sQTL')
dat_plot1 <- subset(dat_plot1,Chromosome%in%c(1:22))
cols=c("#C6B855","#846D40")
CMplot::CMplot(dat_plot1, plot.type="m", LOG10=TRUE, threshold.lwd=c(1,1), threshold.col='#BC3C29', main.font=1, ylab=expression(-log[10](italic(P))),
               threshold=NULL, threshold.lty=c(1,2), chr.den.col=NULL, col = cols, cex =c(0.5,0.5,0.5), lab.font=1, main='Manhattan plot of significant sQTLs', height = 4, width = 8.5,
               amplify= TRUE, signal.cex=0.5, file="pdf", dpi=300, file.output=T, verbose=TRUE, legend.pos='middle')


AI中组合图片并修改细节






4 Supplementary Figure 4


4.1 FigS4 A-C

cd /data1/qtl/code_for_submit/data

R
options(stringsAsFactors=F)
library(data.table)
library(ggplot2)
library(tidyr)
library(dplyr)
rm(list=ls())
aqtl <- fread("338Lung_aqtl_5pca.fdr005.batch.txt.gz")
names(aqtl)[1:2] <- c('genes','gene_id')
aqtl <- separate(aqtl,"gene_id",into = c("transcript","gene","chr","direction"),
                 sep = ":", remove = FALSE)
aqtl$pos_ref <- sapply(strsplit(aqtl$variant_id,':'),'[',2)
aqtl$pos_ref <- as.numeric(aqtl$pos_ref)
pas <- fread("Dapars2_res.all_chromosomes.txt")
names(pas)[1] <- "gene_id"
pas <- pas[,c(1,3)]
pas$gene_id <- gsub("|",":",pas$gene_id,fixed = T)
aqtl <- left_join(aqtl,pas,by = "gene_id")
aqtl$distance_1 <- aqtl$pos_ref-aqtl$Predicted_Proximal_APA
p0 <- ggplot(aes(x = distance_1/1e6,y= -log10(pval_nominal)),data = aqtl) + 
  geom_point(color='#BC3C29',alpha = 0.8, size = 0.1) + 
  theme_classic() + 
  ylab(expression(-log[10]*italic("P"))) + xlab('Distance from SNP to PAS(Mb)') +
  ggtitle("") + 
  theme(axis.text.y = element_text(size=12, color = "black"), 
        axis.text.x = element_text(size=12, color = "black"), 
        axis.title.y = element_text(size=14, color = "black"), 
        axis.title.x = element_text(size=12, color = "black"),
        plot.title = element_text(size=16, color = "black",hjust = 0.5)) +
  theme(axis.line = element_line(colour = 'black', size = 0.5)) +
  theme(plot.margin = margin(t=0.2, r=0, b=0.2, l=0, "cm"))
ggsave('FigS4A.pdf',p0, width=4 ,height= 3.5)

rm(list=ls())
gtf <- fread("gene_hg19.csv")
gtf$tss <- ifelse(gtf$direction=='+',gtf$start,gtf$end)
gtf$tes <- ifelse(gtf$direction=='+',gtf$end,gtf$start)
names(gtf) <- paste0(names(gtf),'_gtf')
names(gtf)[5] <- 'gene_id'
eqtl <- fread("338Lung_eqtl_5pca.fdr005.batch.txt.gz")
eqtl <- left_join(eqtl,gtf,by = "gene_id")
eqtl$pos_ref <- as.numeric(sapply(strsplit(eqtl$variant_id,':'),'[',2))
eqtl$distance_1 <- eqtl$pos_ref-eqtl$tss_gtf

p0 <- ggplot(aes(x = (eqtl$distance_1)/1e6,y= -log10(pval_nominal)),data = eqtl) + 
  geom_point(color='#2D417F',alpha = 0.8, size = 0.1) + 
  theme_classic() + 
  ylab(expression(-log[10]*italic("P"))) + xlab('Distance from SNP to TSS(Mb)') +
  ggtitle("") + 
  theme(axis.text.y = element_text(size=12, color = "black"), 
        axis.text.x = element_text(size=12, color = "black"), 
        axis.title.y = element_text(size=14, color = "black"), 
        axis.title.x = element_text(size=12, color = "black"),
        plot.title = element_text(size=16, color = "black",hjust = 0.5)) +
  theme(axis.line = element_line(colour = 'black', size = 0.5)) +
  theme(plot.margin = margin(t=0.2, r=0, b=0.2, l=0, "cm"))
ggsave('FigS4B.pdf',p0, width=4 ,height= 3.5)

rm(list=ls())
sqtl <- fread("338Lung_sqtl_5pca.fdr005.batch.txt.gz")
sqtl$clu_start <- as.integer(sapply(strsplit(sqtl$gene_id,':'),'[',2))
sqtl$clu_end <- as.integer(sapply(strsplit(sqtl$gene_id,':'),'[',3))
sqtl$pos_ref <- sapply(strsplit(sqtl$variant_id,':'),'[',2)
table(is.na(sqtl$pos_ref))
sqtl$pos_ref <- as.integer(sqtl$pos_ref)
sqtl$distance <- apply(sqtl[,c('pos_ref','clu_start','clu_end')],1,
                       function(x){dis <- c( x[1]-x[2],x[1]-x[3] ); 
                       i = which(abs(as.numeric(dis)) == min(abs(as.numeric(dis)))); 
                       dis[i]})
sqtl$distance_1 <- sapply(sqtl$distance,function(x){mean(unlist(x))})
sqtl$distance_1 <- as.numeric(unlist(sqtl$distance_1))
p0 <- ggplot(aes(x = (sqtl$distance_1)/1e6,y= -log10(sqtl$pval_nominal)),data = sqtl) + 
  geom_point(color='#CB812A',alpha = 0.8, size = 0.1) + 
  theme_classic() + 
  ylab(expression(-log[10]*italic("P"))) + xlab('Distance from splice junction(Mb)') +
  ggtitle("") +
  scale_x_continuous(limits = c(-1, 1)) + 
  theme(axis.text.y = element_text(size=12, color = "black"), 
        axis.text.x = element_text(size=12, color = "black"), 
        axis.title.y = element_text(size=14, color = "black"), 
        axis.title.x = element_text(size=12, color = "black"),
        plot.title = element_text(size=16, color = "black",hjust = 0.5)) +
  theme(axis.line = element_line(colour = 'black', size = 0.5)) +
  theme(plot.margin = margin(t=0.2, r=0, b=0.2, l=0, "cm"))
ggsave('FigS4C.pdf',p0, width=4 ,height= 3.5)

4.2 FigS4 D-I

library("plotrix")
sectcol <- c("#173868","#2C73B3","#8A161F","#A23D45","#AF5158","#BB646A","#C7787E","#D38B91","#E09FA3","#F9C7CA")
pieval <- c(95871,27058,21662,23548,389,6415,796,4396,1)
pielabels <- c("Intronic","Intergenic","Upstream","Downstream","Splicing region","3'UTR","5'UTR","Exonic","Other")
percent <- round(pieval/sum(pieval)*100,2)
percent <- paste(percent,"%",sep="")
pielabels <- paste(pielabels,percent,sep=" ")
pdf(file = 'FigS4D.pdf',width = 9,height = 6)
pie3D(pieval,radius=1,height=0.08,labels=pielabels,labelpos=lp,col = sectcol, border = "black",explode=0.25,labelcex=1,theta=pi/3.5,main="aQTL")
dev.off()

sectcol <- c("#173868","#2C73B3","#8A161F","#A23D45","#AF5158","#BB646A","#C7787E","#D38B91","#E09FA3","#F9C7CA")
pieval <- c(1001016,197298,165127,163534,3399,35566,7862,31252,22)
pielabels <- c("Intronic","Intergenic","Upstream","Downstream","Splicing region","3'UTR","5'UTR","Exonic","Other")
percent <- round(pieval/sum(pieval)*100,2)
percent <- paste(percent,"%",sep="")
pielabels <- paste(pielabels,percent,sep=" ")
pdf(file = 'FigS4E.pdf',width = 9,height = 6)
pie3D(pieval,radius=1,height=0.08,labels=pielabels,labelpos=lp,col = sectcol, border = "black",explode=0.25,labelcex=1,theta=pi/3.5,main="eQTL")
dev.off()

sectcol <- c("#173868","#2C73B3","#8A161F","#A23D45","#AF5158","#BB646A","#C7787E","#D38B91","#E09FA3","#F9C7CA")
pieval <- c(496052,81280,80681,81050,2003,16052,3810,16273,10)
pielabels <- c("Intronic","Intergenic","Upstream","Downstream","Splicing region","3'UTR","5'UTR","Exonic","Other")
percent <- round(pieval/sum(pieval)*100,2)
percent <- paste(percent,"%",sep="")
pielabels <- paste(pielabels,percent,sep=" ")
pdf(file = 'FigS4F.pdf',width = 9,height = 6)
pie3D(pieval,radius=1,height=0.08,labels=pielabels,labelpos=lp,col = sectcol, border = "black",explode=0.25,labelcex=1,theta=pi/3.5,main="sQTL")
dev.off()

sectcol <- c("#173868","#2C73B3","#8A161F","#A02A33","#B53E47","#CB535B","#E06870","#F57C84")
pieval <- c(56353,21831,4666,4533,104,1006,170,889)
pielabels <- c("Intronic","Intergenic","Upstream","Downstream","Splicing region","3'UTR","5'UTR","Exonic")
percent <- round(pieval/sum(pieval)*100,2)
percent <- paste(percent,"%",sep="")
pielabels <- paste(pielabels,percent,sep=" ")
lp <- pie3D(pieval,radius=0.8,height=0.2,labels=pielabels,explode=0.1,main="non aQTL")
pdf(file = 'FigS4G.pdf',width = 9,height = 6)
pie3D(pieval,radius=1,height=0.08,labels=pielabels,labelpos=lp,col = sectcol, border = "black",explode=0.25,labelcex=1,theta=pi/3.5,main="aQTL")
dev.off()

sectcol <- c("#173868","#2C73B3","#8A161F","#A02A33","#B53E47","#CB535B","#E06870","#F57C84","#F9C7CA")
pieval <- c(481748,183814,39853,41454,767,8462,1480,8046,5)
pielabels <- c("Intronic","Intergenic","Upstream","Downstream","Splicing region","3'UTR","5'UTR","Exonic","Other")
percent <- round(pieval/sum(pieval)*100,2)
percent <- paste(percent,"%",sep="")
pielabels <- paste(pielabels,percent,sep=" ")
lp <- pie3D(pieval,radius=0.8,height=0.2,labels=pielabels,explode=0.1,main="non eQTL")
pdf(file = 'FigS4H.pdf',width = 9,height = 6)
pie3D(pieval,radius=1,height=0.08,labels=pielabels,labelpos=lp,col = sectcol, border = "black",explode=0.25,labelcex=1,theta=pi/3.5,main="eQTL")
dev.off()

sectcol <- c("#173868","#2C73B3","#8A161F","#A02A33","#B53E47","#CB535B","#E06870","#F57C84","#F9C7CA")
pieval <- c(203545,77679,16722,17738,309,3795,602,3269,5)
pielabels <- c("Intronic","Intergenic","Upstream","Downstream","Splicing region","3'UTR","5'UTR","Exonic","Other")
percent <- round(pieval/sum(pieval)*100,2)
percent <- paste(percent,"%",sep="")
pielabels <- paste(pielabels,percent,sep=" ")
lp <- pie3D(pieval,radius=0.8,height=0.2,labels=pielabels,explode=0.1,main="Lead sQTL")
pdf(file = 'FigS4I.pdf',width = 9,height = 6)
pie3D(pieval,radius=1,height=0.08,labels=pielabels,labelpos=lp,col = sectcol, border = "black",explode=0.25,labelcex=1,theta=pi/3.5,main="sQTL")
dev.off()


AI中组合图片并修改细节






5 Supplementary Figure 5


cd /data1/qtl/code_for_submit/data/FigS5

R
library(data.table)
library(ggplot2)
require(gridExtra)
library(ggpubr)

id_transcript <- 'ENSG00000066084.8'
gene_name <- 'DIP2B'
SNP_name <- 'rs1047912'
exp <- fread('DIP2B_pdui.txt',data.table = F)
exp <- exp[,-1]
exp <- t(exp)
exp <- cbind(IID=rownames(exp),exp)
exp <- as.data.frame(exp)
names(exp)[2] <- 'DIP2B'
query <- fread('../116_geno005_maf001_hwe06.bim_id_query')
cbPalette <- c("#BC3C29","#BC3C29","#BC3C29")
gen <- read.table('338_rs1047912.raw',h=T,check.names = F)
gen <- subset(gen,!is.na(gen[,ncol(gen)]))
names(gen)[7] <- paste0(query$V7[which(query$V2==sapply(strsplit(names(gen)[7],'_'),'[',1))],'_',sapply(strsplit(names(gen)[7],'_'),'[',2))
allele_last <- sapply(strsplit(names(gen)[7],'_'),'[',1)
allele <- c(sapply(strsplit(allele_last,':'),'[',3),sapply(strsplit(allele_last,':'),'[',4))
alt_allele <- paste0(sapply(strsplit(names(gen)[7],'_'),'[',2),sapply(strsplit(names(gen)[7],'_'),'[',2))
ref_allele <- paste0(setdiff(allele,sapply(strsplit(names(gen)[7],'_'),'[',2)),setdiff(allele,sapply(strsplit(names(gen)[7],'_'),'[',2)))
mix_allele <- paste0(substring(ref_allele,1,1),substring(alt_allele,1,1))
gen$rs_n <- ifelse(gen[,7]==0,paste0(ref_allele,'\n(n=',sum(gen[,7]==0),')'), ifelse(gen[,7]==2, paste0(alt_allele,'\n(n=',sum(gen[,7]==2),')'),paste0(mix_allele,'\n(n=',sum(gen[,7]==1),')')))
gen$rs_n <- factor(gen$rs_n, levels = c(paste0(ref_allele,'\n(n=',sum(gen[,7]==0),')'), paste0(mix_allele,'\n(n=',sum(gen[,7]==1),')'), paste0(alt_allele,'\n(n=',sum(gen[,7]==2),')')))
gen <- dplyr::left_join(gen,exp,by = 'IID')
gen[,9] <- as.numeric(gen[,9])
p0 <- ggplot(aes(x=gen$rs_n,y=gen[,ncol(gen)],fill = rs_n),data=gen) + geom_jitter(alpha=0.2,position=position_jitterdodge(jitter.width = 0.35, jitter.height = 0, dodge.width = 0.25))+
  geom_boxplot(alpha=0.4,width=0.2,position=position_dodge(width=0.2),size=0.75,outlier.colour = NA)+geom_violin(alpha=0.2,width=0.7,position=position_dodge(width=0.2),size=0.75)+
  scale_fill_manual(values = cbPalette)+theme_classic() + theme(legend.position="none") + theme(text = element_text(size=16)) + stat_compare_means(aes(group=gen$rs_n),label = "p.format",size=6)+ ylab('Normalized PDUI') + xlab('rs1047912 genotype') + 
  theme(axis.text.y = element_text(size=16, color = "black"), axis.text.x = element_text(size=16, color = "black"), axis.title.y = element_text(size=16, color = "black"), axis.title.x = element_text(size=16, color = "black"),
        plot.title = element_text(size=16, color = "black",hjust = 0.5,margin = margin(t = 0, r = 0, b = 1.5, l = 0,"cm"))) +theme(axis.line = element_line(colour = 'black', size = 0.5)) +
  theme(plot.margin = margin(t=0.2, r=0, b=0.2, l=0, "cm"))+stat_summary(fun.y = "median", geom = "point", size = 1) + stat_summary(fun.y = "median", geom = "line", size = 0.8, aes(group = 1),color="#7E6148B2")
ggsave('FigS5A.pdf',p0, width=5.8 ,height= 5.7)
# 查看QTL的P值和beta值,在对应的图片上使用AI进行修改
zgrep 12:51138862:A/G /data1/qtl/test/338_QTL/338_aQTL/338sample_aqtl.txt.gz | grep ENSG00000066084.8
# ENST00000301180.5|ENSG00000066084.8|chr12|+   12:51138862:A/G 491 96  103 0.152367    1.41335e-16 -0.583168   0.0662792


rm(list = ls())
gene_name <- 'SCCPDH'
SNP_name <- 'rs41310575'
exp <- fread('SCCPDH_pdui.txt',data.table = F)
exp <- exp[,-1]
exp <- t(exp)
exp <- cbind(IID=rownames(exp),exp)
exp <- as.data.frame(exp)
names(exp)[2] <- 'SCCPDH'
query <- fread('../116_geno005_maf001_hwe06.bim_id_query')
cbPalette <- c("#BC3C29","#BC3C29","#BC3C29")
gen <- read.table('338_rs41310575.raw',h=T,check.names = F)
gen <- subset(gen,!is.na(gen[,ncol(gen)]))
names(gen)[7] <- paste0(query$V7[which(query$V2==sapply(strsplit(names(gen)[7],'_'),'[',1))],'_',sapply(strsplit(names(gen)[7],'_'),'[',2))
allele_last <- sapply(strsplit(names(gen)[7],'_'),'[',1)
allele <- c(sapply(strsplit(allele_last,':'),'[',3),sapply(strsplit(allele_last,':'),'[',4))
alt_allele <- paste0(sapply(strsplit(names(gen)[7],'_'),'[',2),sapply(strsplit(names(gen)[7],'_'),'[',2))
ref_allele <- paste0(setdiff(allele,sapply(strsplit(names(gen)[7],'_'),'[',2)),setdiff(allele,sapply(strsplit(names(gen)[7],'_'),'[',2)))
mix_allele <- paste0(substring(ref_allele,1,1),substring(alt_allele,1,1))
gen$rs_n <- ifelse(gen[,7]==0,paste0(ref_allele,'\n(n=',sum(gen[,7]==0),')'), ifelse(gen[,7]==2, paste0(alt_allele,'\n(n=',sum(gen[,7]==2),')'),paste0(mix_allele,'\n(n=',sum(gen[,7]==1),')')))
gen$rs_n <- factor(gen$rs_n, levels = c(paste0(ref_allele,'\n(n=',sum(gen[,7]==0),')'), paste0(mix_allele,'\n(n=',sum(gen[,7]==1),')'), paste0(alt_allele,'\n(n=',sum(gen[,7]==2),')')))
gen <- dplyr::left_join(gen,exp,by = 'IID')
gen[,9] <- as.numeric(gen[,9])
p0 <- ggplot(aes(x=gen$rs_n,y=gen[,ncol(gen)],fill = rs_n),data=gen) + 
  geom_jitter(alpha=0.2,position=position_jitterdodge(jitter.width = 0.35, jitter.height = 0, dodge.width = 0.25))+
  geom_boxplot(alpha=0.4,width=0.2,position=position_dodge(width=0.2),size=0.75,outlier.colour = NA)+
  geom_violin(alpha=0.2,width=0.7,position=position_dodge(width=0.2),size=0.75)+
  scale_fill_manual(values = cbPalette)+theme_classic() + theme(legend.position="none") + theme(text = element_text(size=16)) + 
  stat_compare_means(aes(group=gen$rs_n),label = "p.format",size=6)+ylab('Normalized PDUI') + xlab('rs41310575 genotype') + 
  theme(axis.text.y = element_text(size=16, color = "black"), axis.text.x = element_text(size=16, color = "black"), axis.title.y = element_text(size=16, color = "black"), axis.title.x = element_text(size=16, color = "black"),
        plot.title = element_text(size=16, color = "black",hjust = 0.5,margin = margin(t = 0, r = 0, b = 1.5, l = 0,"cm"))) +theme(axis.line = element_line(colour = 'black', size = 0.5)) +
  theme(plot.margin = margin(t=0.2, r=0, b=0.2, l=0, "cm"))+stat_summary(fun.y = "median", geom = "point", size = 1) + stat_summary(fun.y = "median", geom = "line", size = 0.8, aes(group = 1),color="#7E6148B2")
ggsave('FigS5B.pdf',p0 width=5.8 ,height= 5.7)
# 查看QTL的P值和beta值,在对应的图片上使用AI进行修改
zgrep 1:246931119:A/G /data1/qtl/test/338_QTL/338_aQTL/338sample_aqtl.txt.gz | grep ENSG00000143653.9
# ENST00000366510.3|ENSG00000143653.9|chr1|+    1:246931119:A/G 621 90  94  0.139053    6.88606e-20 -0.800303   0.0811891


AI中组合图片并修改细节






6 Supplementary Figure 6


R
GenoViewX::Mplot_for_QTLs(file_path = './',
                          file_name = '3_China_pop_NSCLC_updated_0916.TBL',
                          qtl_file = '338Lung_aqtl_5pca.fdr005.batch.txt.gz',
                          figure_pathy = './',
                          figure_name = 'aQTL_NSCLC',
                          p_threshold =5e-04,
                          p_col = 10,
                          snp_col = 1,
                          colour = c("#844040"))

GenoViewX::Mplot_for_QTLs(file_path = './',
                          file_name = '3_China_pop_AD_updated_0916.TBL',
                          qtl_file = '338Lung_aqtl_5pca.fdr005.batch.txt.gz',
                          figure_pathy = './',
                          figure_name = 'aQTL_LUAD',
                          p_threshold =5e-04,
                          p_col = 10,
                          snp_col = 1,
                          colour = c("#844040"))

GenoViewX::Mplot_for_QTLs(file_path = './',
                          file_name = '3_China_pop_SC_updated_0916.TBL',
                          qtl_file = '338Lung_aqtl_5pca.fdr005.batch.txt.gz',
                          figure_pathy = './',
                          figure_name = 'aQTL_LUSC',
                          p_threshold =5e-04,
                          p_col = 10,
                          snp_col = 1,
                          colour = c("#844040"))


AI中组合图片并修改细节






7 Supplementary Figure 7


R
GenoViewX::Mplot_for_QTLs(file_path = './',
                          file_name = '3_China_pop_NSCLC_updated_0916.TBL',
                          qtl_file = '338Lung_eqtl_5pca.fdr005.batch.txt.gz',
                          figure_pathy = './',
                          figure_name = 'eQTL_NSCLC',
                          p_threshold =5e-04,
                          p_col = 10,
                          snp_col = 1,
                          colour = c("#3F6585"))

GenoViewX::Mplot_for_QTLs(file_path = './',
                          file_name = '3_China_pop_AD_updated_0916.TBL',
                          qtl_file = '338Lung_eqtl_5pca.fdr005.batch.txt.gz',
                          figure_pathy = './',
                          figure_name = 'eQTL_LUAD',
                          p_threshold =5e-04,
                          p_col = 10,
                          snp_col = 1,
                          colour = c("#3F6585"))

GenoViewX::Mplot_for_QTLs(file_path = './',
                          file_name = '3_China_pop_SC_updated_0916.TBL',
                          qtl_file = '338Lung_eqtl_5pca.fdr005.batch.txt.gz',
                          figure_pathy = './',
                          figure_name = 'eQTL_LUSC',
                          p_threshold =5e-04,
                          p_col = 10,
                          snp_col = 1,
                          colour = c("#3F6585"))


AI中组合图片并修改细节






8 Supplementary Figure 8


R
GenoViewX::Mplot_for_QTLs(file_path = './',
                          file_name = '3_China_pop_NSCLC_updated_0916.TBL',
                          qtl_file = '338Lung_sqtl_5pca.fdr005.batch.txt.gz',
                          figure_pathy = './',
                          figure_name = 'sQTL_NSCLC',
                          p_threshold =5e-04,
                          p_col = 10,
                          snp_col = 1,
                          colour = c("#E18227"))

GenoViewX::Mplot_for_QTLs(file_path = './',
                          file_name = '3_China_pop_AD_updated_0916.TBL',
                          qtl_file = '338Lung_sqtl_5pca.fdr005.batch.txt.gz',
                          figure_pathy = './',
                          figure_name = 'sQTL_LUAD',
                          p_threshold =5e-04,
                          p_col = 10,
                          snp_col = 1,
                          colour = c("#E18227"))

GenoViewX::Mplot_for_QTLs(file_path = './',
                          file_name = '3_China_pop_SC_updated_0916.TBL',
                          qtl_file = '338Lung_sqtl_5pca.fdr005.batch.txt.gz',
                          figure_pathy = './',
                          figure_name = 'sQTL_LUSC',
                          p_threshold =5e-04,
                          p_col = 10,
                          snp_col = 1,
                          colour = c("#E18227"))


AI中组合图片并修改细节






9 Supplementary Figure 9


cd /data1/qtl/code_for_submit/data/FigS9 

R
rm(list = ls())
library(ggplot2)
library(data.table)
library(patchwork)
twas <- read.csv('TWAS_wgt_res.csv')
p1 <- ggplot(aes(x = subset(twas,model=='top1')$hsq, y = subset(twas,model=='top1')$rsq), data = subset(twas,model=='top1')) + 
  geom_smooth(method = lm, formula = y ~ x, colour = "black", se = FALSE, lwd = 2) +
  geom_point(size = 3, color = '#002277', alpha = 0.85) + 
  xlab('h2 of genes') + 
  ylab('R2 of Top1 model') + 
  theme_bw() +
  theme(axis.text = element_text(size = 14, color = 'black'),
        axis.title = element_text(size = 14),
        axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)),
        axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0))) + 
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank()) +
  theme(panel.border = element_rect(colour = "black", fill = NA, size = 1),
        axis.line = element_line(colour = "black", size = 0.5, linetype = 1),
        axis.ticks.length = unit(7, "pt"),
        axis.ticks = element_line(colour = "black")) + 
  xlim(0, 1) + ylim(0, 1) +
  annotate("text", x = 0.2, y = 0.95, label = paste0('Mean R2 = ', round(mean(subset(twas,model=='top1')$rsq,na.rm = T),2)), size = 5) +
  annotate("text", x = 0.2, y = 0.85, label = paste0('Mean h2 = ', round(mean(subset(twas,model=='top1')$hsq),2)), size = 5) 
p2 <- ggplot(aes(x = subset(twas,model=='blup')$hsq, y = subset(twas,model=='blup')$rsq), data = subset(twas,model=='blup')) + 
  geom_smooth(method = lm, formula = y ~ x, colour = "black", se = FALSE, lwd = 2) +
  geom_point(size = 3, color = '#002277', alpha = 0.85) + 
  xlab('h2 of genes') + 
  ylab('R2 of BLUP model') + 
  theme_bw() +
  theme(axis.text = element_text(size = 14, color = 'black'),
        axis.title = element_text(size = 14),
        axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)),
        axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0))) + 
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank()) +
  theme(panel.border = element_rect(colour = "black", fill = NA, size = 1),
        axis.line = element_line(colour = "black", size = 0.5, linetype = 1),
        axis.ticks.length = unit(7, "pt"),
        axis.ticks = element_line(colour = "black")) + 
  xlim(0, 1) + ylim(0, 1) +
  annotate("text", x = 0.2, y = 0.95, label = paste0('Mean R2 = ', round(mean(subset(twas,model=='blup')$rsq,na.rm = T),2)), size = 5) +
  annotate("text", x = 0.2, y = 0.85, label = paste0('Mean h2 = ', round(mean(subset(twas,model=='blup')$hsq),2)), size = 5) 
p3 <- ggplot(aes(x = subset(twas,model=='lasso')$hsq, y = subset(twas,model=='lasso')$rsq), data = subset(twas,model=='lasso')) + 
  geom_smooth(method = lm, formula = y ~ x, colour = "black", se = FALSE, lwd = 2) +
  geom_point(size = 3, color = '#002277', alpha = 0.85) + 
  xlab('h2 of genes') + 
  ylab('R2 of LASSO model') + 
  theme_bw() +
  theme(axis.text = element_text(size = 14, color = 'black'),
        axis.title = element_text(size = 14),
        axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)),
        axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0))) + 
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank()) +
  theme(panel.border = element_rect(colour = "black", fill = NA, size = 1),
        axis.line = element_line(colour = "black", size = 0.5, linetype = 1),
        axis.ticks.length = unit(7, "pt"),
        axis.ticks = element_line(colour = "black")) + 
  xlim(0, 1) + ylim(0, 1) +
  annotate("text", x = 0.2, y = 0.95, label = paste0('Mean R2 = ', round(mean(subset(twas,model=='lasso')$rsq,na.rm = T),2)), size = 5) +
  annotate("text", x = 0.2, y = 0.85, label = paste0('Mean h2 = ', round(mean(subset(twas,model=='lasso')$hsq),2)), size = 5) 
p4 <- ggplot(aes(x = subset(twas,model=='enet')$hsq, y = subset(twas,model=='enet')$rsq), data = subset(twas,model=='enet')) + 
  geom_smooth(method = lm, formula = y ~ x, colour = "black", se = FALSE, lwd = 2) +
  geom_point(size = 3, color = '#002277', alpha = 0.85) + 
  xlab('h2 of genes') + 
  ylab('R2 of Elastic-net model') + 
  theme_bw() +
  theme(axis.text = element_text(size = 14, color = 'black'),
        axis.title = element_text(size = 14),
        axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)),
        axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0))) + 
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank()) +
  theme(panel.border = element_rect(colour = "black", fill = NA, size = 1),
        axis.line = element_line(colour = "black", size = 0.5, linetype = 1),
        axis.ticks.length = unit(7, "pt"),
        axis.ticks = element_line(colour = "black")) + 
  xlim(0, 1) + ylim(0, 1) +
  annotate("text", x = 0.2, y = 0.95, label = paste0('Mean R2 = ', round(mean(subset(twas,model=='enet')$rsq,na.rm = T),2)), size = 5) +
  annotate("text", x = 0.2, y = 0.85, label = paste0('Mean h2 = ', round(mean(subset(twas,model=='enet')$hsq),2)), size = 5) 
p <- p1 + p2 + p3 +p4 + plot_layout(nrow = 4, heights = c(1.1, 1, 1, 1.2))


apatwas <- read.csv('apaTWAS_wgt_res.csv')
p11 <- ggplot(aes(x = subset(apatwas,model=='top1')$hsq, y = subset(apatwas,model=='top1')$rsq), data = subset(apatwas,model=='top1')) + 
  geom_smooth(method = lm, formula = y ~ x, colour = "black", se = FALSE, lwd = 2) +
  geom_point(size = 3, color = '#800A00', alpha = 0.85) + 
  xlab('h2 of APA envents') + 
  ylab('R2 of Top1 model') + 
  theme_bw() +
  theme(axis.text = element_text(size = 14, color = 'black'),
        axis.title = element_text(size = 14),
        axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)),
        axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0))) + 
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank()) +
  theme(panel.border = element_rect(colour = "black", fill = NA, size = 1),
        axis.line = element_line(colour = "black", size = 0.5, linetype = 1),
        axis.ticks.length = unit(7, "pt"),
        axis.ticks = element_line(colour = "black")) + 
  xlim(0, 1) + ylim(0, 1) +
  annotate("text", x = 0.2, y = 0.95, label = paste0('Mean R2 = ', round(mean(subset(apatwas,model=='top1')$rsq,na.rm = T),2)), size = 5) +
  annotate("text", x = 0.2, y = 0.85, label = paste0('Mean h2 = ', round(mean(subset(apatwas,model=='top1')$hsq),2)), size = 5) 
p22 <- ggplot(aes(x = subset(apatwas,model=='blup')$hsq, y = subset(apatwas,model=='blup')$rsq), data = subset(apatwas,model=='blup')) + 
  geom_smooth(method = lm, formula = y ~ x, colour = "black", se = FALSE, lwd = 2) +
  geom_point(size = 3, color = '#800A00', alpha = 0.85) + 
  xlab('h2 of APA envents') + 
  ylab('R2 of BLUP model') + 
  theme_bw() +
  theme(axis.text = element_text(size = 14, color = 'black'),
        axis.title = element_text(size = 14),
        axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)),
        axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0))) + 
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank()) +
  theme(panel.border = element_rect(colour = "black", fill = NA, size = 1),
        axis.line = element_line(colour = "black", size = 0.5, linetype = 1),
        axis.ticks.length = unit(7, "pt"),
        axis.ticks = element_line(colour = "black")) + 
  xlim(0, 1) + ylim(0, 1) +
  annotate("text", x = 0.2, y = 0.95, label = paste0('Mean R2 = ', round(mean(subset(apatwas,model=='blup')$rsq,na.rm = T),2)), size = 5) +
  annotate("text", x = 0.2, y = 0.85, label = paste0('Mean h2 = ', round(mean(subset(apatwas,model=='blup')$hsq),2)), size = 5) 
p33 <- ggplot(aes(x = subset(apatwas,model=='lasso')$hsq, y = subset(apatwas,model=='lasso')$rsq), data = subset(apatwas,model=='lasso')) + 
  geom_smooth(method = lm, formula = y ~ x, colour = "black", se = FALSE, lwd = 2) +
  geom_point(size = 3, color = '#800A00', alpha = 0.85) + 
  xlab('h2 of APA envents') + 
  ylab('R2 of LASSO model') + 
  theme_bw() +
  theme(axis.text = element_text(size = 14, color = 'black'),
        axis.title = element_text(size = 14),
        axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)),
        axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0))) + 
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank()) +
  theme(panel.border = element_rect(colour = "black", fill = NA, size = 1),
        axis.line = element_line(colour = "black", size = 0.5, linetype = 1),
        axis.ticks.length = unit(7, "pt"),
        axis.ticks = element_line(colour = "black")) + 
  xlim(0, 1) + ylim(0, 1) +
  annotate("text", x = 0.2, y = 0.95, label = paste0('Mean R2 = ', round(mean(subset(apatwas,model=='lasso')$rsq,na.rm = T),2)), size = 5) +
  annotate("text", x = 0.2, y = 0.85, label = paste0('Mean h2 = ', round(mean(subset(apatwas,model=='lasso')$hsq),2)), size = 5) 
p44 <- ggplot(aes(x = subset(apatwas,model=='enet')$hsq, y = subset(apatwas,model=='enet')$rsq), data = subset(apatwas,model=='enet')) + 
  geom_smooth(method = lm, formula = y ~ x, colour = "black", se = FALSE, lwd = 2) +
  geom_point(size = 3, color = '#800A00', alpha = 0.85) + 
  xlab('h2 of APA envents') + 
  ylab('R2 of Elastic-net model') + 
  theme_bw() +
  theme(axis.text = element_text(size = 14, color = 'black'),
        axis.title = element_text(size = 14),
        axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)),
        axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0))) + 
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank()) +
  theme(panel.border = element_rect(colour = "black", fill = NA, size = 1),
        axis.line = element_line(colour = "black", size = 0.5, linetype = 1),
        axis.ticks.length = unit(7, "pt"),
        axis.ticks = element_line(colour = "black")) + 
  xlim(0, 1) + ylim(0, 1) +
  annotate("text", x = 0.2, y = 0.95, label = paste0('Mean R2 = ', round(mean(subset(apatwas,model=='enet')$rsq,na.rm = T),2)), size = 5) +
  annotate("text", x = 0.2, y = 0.85, label = paste0('Mean h2 = ', round(mean(subset(apatwas,model=='enet')$hsq),2)), size = 5) 
pp <- p11 + p22 + p33 +p44 + plot_layout(nrow = 4, heights = c(1.1, 1, 1, 1.2))


sptwas <- read.csv('spTWAS_wgt_res.csv')
p111 <- ggplot(aes(x = subset(sptwas,model=='top1')$hsq, y = subset(sptwas,model=='top1')$rsq), data = subset(sptwas,model=='top1')) + 
  geom_smooth(method = lm, formula = y ~ x, colour = "black", se = FALSE, lwd = 2) +
  geom_point(size = 3, color = '#A3590F', alpha = 0.85) + 
  xlab('h2 of AS events') + 
  ylab('R2 of Top1 model') + 
  theme_bw() +
  theme(axis.text = element_text(size = 14, color = 'black'),
        axis.title = element_text(size = 14),
        axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)),
        axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0))) + 
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank()) +
  theme(panel.border = element_rect(colour = "black", fill = NA, size = 1),
        axis.line = element_line(colour = "black", size = 0.5, linetype = 1),
        axis.ticks.length = unit(7, "pt"),
        axis.ticks = element_line(colour = "black")) + 
  xlim(0, 1) + ylim(0, 1) +
  annotate("text", x = 0.2, y = 0.95, label = paste0('Mean R2 = ', round(mean(subset(sptwas,model=='top1')$rsq,na.rm = T),2)), size = 5) +
  annotate("text", x = 0.2, y = 0.85, label = paste0('Mean h2 = ', round(mean(subset(sptwas,model=='top1')$hsq),2)), size = 5) 
p222 <- ggplot(aes(x = subset(sptwas,model=='blup')$hsq, y = subset(sptwas,model=='blup')$rsq), data = subset(sptwas,model=='blup')) + 
  geom_smooth(method = lm, formula = y ~ x, colour = "black", se = FALSE, lwd = 2) +
  geom_point(size = 3, color = '#A3590F', alpha = 0.85) + 
  xlab('h2 of AS events') + 
  ylab('R2 of BLUP model') + 
  theme_bw() +
  theme(axis.text = element_text(size = 14, color = 'black'),
        axis.title = element_text(size = 14),
        axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)),
        axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0))) + 
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank()) +
  theme(panel.border = element_rect(colour = "black", fill = NA, size = 1),
        axis.line = element_line(colour = "black", size = 0.5, linetype = 1),
        axis.ticks.length = unit(7, "pt"),
        axis.ticks = element_line(colour = "black")) + 
  xlim(0, 1) + ylim(0, 1) +
  annotate("text", x = 0.2, y = 0.95, label = paste0('Mean R2 = ', round(mean(subset(sptwas,model=='blup')$rsq,na.rm = T),2)), size = 5) +
  annotate("text", x = 0.2, y = 0.85, label = paste0('Mean h2 = ', round(mean(subset(sptwas,model=='blup')$hsq),2)), size = 5) 
p333 <- ggplot(aes(x = subset(sptwas,model=='lasso')$hsq, y = subset(sptwas,model=='lasso')$rsq), data = subset(sptwas,model=='lasso')) + 
  geom_smooth(method = lm, formula = y ~ x, colour = "black", se = FALSE, lwd = 2) +
  geom_point(size = 3, color = '#A3590F', alpha = 0.85) + 
  xlab('h2 of AS events') + 
  ylab('R2 of LASSO model') + 
  theme_bw() +
  theme(axis.text = element_text(size = 14, color = 'black'),
        axis.title = element_text(size = 14),
        axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)),
        axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0))) + 
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank()) +
  theme(panel.border = element_rect(colour = "black", fill = NA, size = 1),
        axis.line = element_line(colour = "black", size = 0.5, linetype = 1),
        axis.ticks.length = unit(7, "pt"),
        axis.ticks = element_line(colour = "black")) + 
  xlim(0, 1) + ylim(0, 1) +
  annotate("text", x = 0.2, y = 0.95, label = paste0('Mean R2 = ', round(mean(subset(sptwas,model=='lasso')$rsq,na.rm = T),2)), size = 5) +
  annotate("text", x = 0.2, y = 0.85, label = paste0('Mean h2 = ', round(mean(subset(sptwas,model=='lasso')$hsq),2)), size = 5) 
p444 <- ggplot(aes(x = subset(sptwas,model=='enet')$hsq, y = subset(sptwas,model=='enet')$rsq), data = subset(sptwas,model=='enet')) + 
  geom_smooth(method = lm, formula = y ~ x, colour = "black", se = FALSE, lwd = 2) +
  geom_point(size = 3, color = '#A3590F', alpha = 0.85) + 
  xlab('h2 of AS events') + 
  ylab('R2 of Elastic-net model') + 
  theme_bw() +
  theme(axis.text = element_text(size = 14, color = 'black'),
        axis.title = element_text(size = 14),
        axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)),
        axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0))) + 
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank()) +
  theme(panel.border = element_rect(colour = "black", fill = NA, size = 1),
        axis.line = element_line(colour = "black", size = 0.5, linetype = 1),
        axis.ticks.length = unit(7, "pt"),
        axis.ticks = element_line(colour = "black")) + 
  xlim(0, 1) + ylim(0, 1) +
  annotate("text", x = 0.2, y = 0.95, label = paste0('Mean R2 = ', round(mean(subset(sptwas,model=='enet')$rsq,na.rm = T),2)), size = 5) +
  annotate("text", x = 0.2, y = 0.85, label = paste0('Mean h2 = ', round(mean(subset(sptwas,model=='enet')$hsq),2)), size = 5) 
ppp <- p111 + p222 + p333 +p444 + plot_layout(nrow = 4, heights = c(1.1, 1, 1, 1.2))
p_out <- p1 + p11 + p111 + p2 + p22 + p222 + p3+ p33 + p333 + p4 + p44 + p444 + plot_layout(ncol = 3, widths = c(1,1,1))
ggsave('FigS9.pdf',p_out,width=13.5 ,height= 15.5)


AI中组合图片并修改细节






10 Supplementary Figure 10


cd /data1/qtl/code_for_submit/data/FigS10

R
type <- 'aqtl'
list <- c('NSCLC','LUAD','LUSC')
for (i in 1:length(list)) {
  GenoViewX::Mplot_for_TWAS(paste0('./',type),paste0(list[i],'_China_apa_twasplot_update'),paste0('apaTWAS Manhattan Plot of ',list[i]),0.1,paste0('./',type),11)
}


type <- 'eqtl'
list <- c('NSCLC','LUAD','LUSC')
for (i in 1:length(list)) {
  GenoViewX::Mplot_for_TWAS(paste0('./',type),paste0(list[i],'_TWAS_twasplot_update'),paste0('TWAS Manhattan Plot of ',list[i]),0.1,paste0('./',type),12)
}

type <- 'sqtl'
list <- c('NSCLC','LUAD','LUSC')
for (i in 1:length(list)) {
  GenoViewX::Mplot_for_TWAS(paste0('./',type),paste0(list[i],'_China_Intron_twasplot_update'),paste0('spTWAS Manhattan Plot of ',list[i]),0.1,paste0('./',type),14)
}


AI中组合图片并修改细节






11 Supplementary Figure 11


cd /data1/qtl/code_for_submit/data/FigS10

R
rm(list = ls())
library(data.table)
library(ggplot2)
library(reshape2)
library(dplyr)
library(stringr)
library(forcats)
library(patchwork)

file_path <- './'
eColoc <- read.csv(paste0(file_path,'eQTL_coloc_res_PP4_05.csv'))
aColoc <- read.csv(paste0(file_path,'aQTL_coloc_res_PP4_05.csv'))
sColoc <- read.csv(paste0(file_path,'sQTL_coloc_res_PP4_05.csv'))
eGene <- unique(eColoc$gene_name)
aGene <- unique(aColoc$gene_name)
sGene <- unique(sColoc$gene_name)
eColoc_all <- read.csv(paste0(file_path,'eQTL_coloc_res_all.csv'))
aColoc_all <- read.csv(paste0(file_path,'aQTL_coloc_res_all.csv'))
sColoc_all <- read.csv(paste0(file_path,'sQTL_coloc_res_all.csv'))
eColoc_use <- subset(eColoc_all,gene_name%in%eGene)
aColoc_use <- subset(aColoc_all,gene_name%in%aGene)
aColoc_use1 <- subset(aColoc_use,type=='NSCLC')[order(subset(aColoc_use,type=='NSCLC')$PP.H4.abf,decreasing = T),]
aColoc_use1 <- subset(aColoc_use1,!duplicated(aColoc_use1$gene_name))
aColoc_use2 <- subset(aColoc_use,type=='LUAD')[order(subset(aColoc_use,type=='LUAD')$PP.H4.abf,decreasing = T),]
aColoc_use2 <- subset(aColoc_use2,!duplicated(aColoc_use2$gene_name))
aColoc_use3 <- subset(aColoc_use,type=='LUSC')[order(subset(aColoc_use,type=='LUSC')$PP.H4.abf,decreasing = T),]
aColoc_use3 <- subset(aColoc_use3,!duplicated(aColoc_use3$gene_name))
aColoc_use <- rbind(aColoc_use1,aColoc_use2,aColoc_use3)
sColoc_use <- subset(sColoc_all,gene_name%in%sGene)
sColoc_use1 <- subset(sColoc_use,type=='NSCLC')[order(subset(sColoc_use,type=='NSCLC')$PP.H4.abf,decreasing = T),]
sColoc_use1 <- subset(sColoc_use1,!duplicated(sColoc_use1$gene_name))
sColoc_use2 <- subset(sColoc_use,type=='LUAD')[order(subset(sColoc_use,type=='LUAD')$PP.H4.abf,decreasing = T),]
sColoc_use2 <- subset(sColoc_use2,!duplicated(sColoc_use2$gene_name))
sColoc_use3 <- subset(sColoc_use,type=='LUSC')[order(subset(sColoc_use,type=='LUSC')$PP.H4.abf,decreasing = T),]
sColoc_use3 <- subset(sColoc_use3,!duplicated(sColoc_use3$gene_name))
sColoc_use <- rbind(sColoc_use1,sColoc_use2,sColoc_use3)
eColoc_use <- eColoc_use[,c('type','gene_name','PP.H4.abf')]
aColoc_use <- aColoc_use[,c('type','gene_name','PP.H4.abf')]
sColoc_use <- sColoc_use[,c('type','gene_name','PP.H4.abf')]
eColoc_use$QTL <- 'eQTL'
aColoc_use$QTL <- 'aQTL'
sColoc_use$QTL <- 'sQTL'
dat <- rbind(eColoc_use,aColoc_use,sColoc_use)
dat$type <- factor(dat$type,levels = c('NSCLC','LUAD','LUSC'))
dat$PP.H4.abf <- as.numeric(dat$PP.H4.abf)
cytoband <- read.csv('coloc_cytoband_res.csv')
cytoband <- cytoband[,c('V1','V2','V3','V5','V9')]
names(cytoband) <- c('chr','start','end','gene_name','cyto')
dat <- dplyr::left_join(dat,cytoband,by = 'gene_name')
dat$gene_name_cyto <- paste0(dat$gene_name,'(',dat$cyto,')')
dat1 <- subset(dat,QTL=='eQTL')
dat1 <- dat1 %>%
  mutate(
    chr_num = as.numeric(str_extract(chr, "\\d+")),
    arm = ifelse(str_detect(cyto, "p"), "p", "q"),
    band = as.numeric(str_extract(cyto, "(?<=p|q)\\d+(\\.\\d+)?")))
dat1 <- dat1 %>%
  arrange(chr_num, arm, band, gene_name)
dat1 <- dat1 %>%
  mutate(gene_name_cyto = factor(gene_name_cyto, levels = rev(unique(gene_name_cyto))))
p1 <- ggplot(dat1, aes(x = type, y = gene_name_cyto, fill = PP.H4.abf)) +
  geom_tile(color = "white", width = 0.95, height = 0.95) + 
  scale_fill_gradient2(
    low = "#4979B6",  
    mid = "#FEF9B6",   
    high = "#D73027",
    midpoint = 0.5,     
    name = "Coloc\nPP.H4.abf",
    limits = c(0, 1)) +
  geom_text(data = subset(dat1,PP.H4.abf>0.5), 
            aes(label = "*"), color = "black", size = 5) +
  labs(x = "",y = "",title = "") +
  theme_minimal(base_size = 14) +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 0.5, size = 10, color = "black"),
    axis.text.y = element_text(size = 10, color = "black"),
    axis.title = element_text(size = 14, color = "black"),
    plot.title = element_text(size = 16, color = "black", hjust = 0.5),
    panel.grid = element_blank(), 
    legend.position = "right",   
    legend.title = element_text(size = 12, color = "black"),
    legend.text = element_text(size = 10)) +
  coord_fixed() 

dat1 <- subset(dat,QTL=='aQTL')
dat1 <- dat1 %>%
  mutate(
    chr_num = as.numeric(str_extract(chr, "\\d+")),
    arm = ifelse(str_detect(cyto, "p"), "p", "q"),
    band = as.numeric(str_extract(cyto, "(?<=p|q)\\d+(\\.\\d+)?")))
dat1 <- dat1 %>%
  arrange(chr_num, arm, band, gene_name)
dat1 <- dat1 %>%
  mutate(gene_name_cyto = factor(gene_name_cyto, levels = rev(unique(gene_name_cyto))))
p2 <- ggplot(dat1, aes(x = type, y = gene_name_cyto, fill = PP.H4.abf)) +
  geom_tile(color = "white", width = 0.95, height = 0.95) + 
  scale_fill_gradient2(
    low = "#4979B6", 
    mid = "#FEF9B6",    
    high = "#D73027", 
    midpoint = 0.5,    
    name = "Coloc\nPP.H4.abf",
    limits = c(0, 1)) +
  geom_text(data = subset(dat1,PP.H4.abf>0.5), 
            aes(label = "*"), color = "black", size = 5) +
  labs(x = "",y = "",title = "") +
  theme_minimal(base_size = 14) +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 0.5, size = 10, color = "black"),
    axis.text.y = element_text(size = 10, color = "black"),
    axis.title = element_text(size = 14, color = "black"),
    plot.title = element_text(size = 16, color = "black", hjust = 0.5),
    panel.grid = element_blank(), 
    legend.position = "right",    
    legend.title = element_text(size = 12, color = "black"),
    legend.text = element_text(size = 10)) +
  coord_fixed() 

dat1 <- subset(dat,QTL=='sQTL')
dat1 <- dat1 %>%
  mutate(
    chr_num = as.numeric(str_extract(chr, "\\d+")),
    arm = ifelse(str_detect(cyto, "p"), "p", "q"),
    band = as.numeric(str_extract(cyto, "(?<=p|q)\\d+(\\.\\d+)?")))
dat1 <- dat1 %>%
  arrange(chr_num, arm, band, gene_name)
dat1 <- dat1 %>%
  mutate(gene_name_cyto = factor(gene_name_cyto, levels = rev(unique(gene_name_cyto))))

p3 <- ggplot(dat1, aes(x = type, y = gene_name_cyto, fill = PP.H4.abf)) +
  geom_tile(color = "white", width = 0.95, height = 0.95) + 
  scale_fill_gradient2(
    low = "#4979B6",  
    mid = "#FEF9B6",   
    high = "#D73027",
    midpoint = 0.5,    
    name = "Coloc\nPP.H4.abf",
    limits = c(0, 1)) +
  geom_text(data = subset(dat1,PP.H4.abf>0.5), 
            aes(label = "*"), color = "black", size = 5) +
  labs(x = "",y = "",title = "") +
  theme_minimal(base_size = 14) +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 0.5, size = 10, color = "black"),
    axis.text.y = element_text(size = 10, color = "black"),
    axis.title = element_text(size = 14, color = "black"),
    plot.title = element_text(size = 16, color = "black", hjust = 0.5),
    panel.grid = element_blank(), 
    legend.position = "right",   
    legend.title = element_text(size = 12, color = "black"),
    legend.text = element_text(size = 10)) +
  coord_fixed() 



file_path <- './'
eColoc <- read.csv(paste0(file_path,'eqtl_SMR_res_FDR01_HEIDI005.csv'))
aColoc <- read.csv(paste0(file_path,'aqtl_SMR_res_FDR01_HEIDI005.csv'))
sColoc <- read.csv(paste0(file_path,'sqtl_SMR_res_FDR01_HEIDI005.csv'))
eGene <- unique(eColoc$Gene)
aGene <- unique(aColoc$Gene)
sGene <- unique(sColoc$Gene)
eColoc_all <- read.csv(paste0(file_path,'eqtl_SMR_res_all.csv'))
aColoc_all <- read.csv(paste0(file_path,'aqtl_SMR_res_all.csv'))
sColoc_all <- read.csv(paste0(file_path,'sqtl_SMR_res_all.csv'))
eColoc_use <- subset(eColoc_all,Gene%in%eGene)
aColoc_use <- subset(aColoc_all,Gene%in%aGene)
aColoc_use1 <- subset(aColoc_use,type=='NSCLC')[order(subset(aColoc_use,type=='NSCLC')$p_SMR),]
aColoc_use1 <- subset(aColoc_use1,!duplicated(aColoc_use1$Gene))
aColoc_use2 <- subset(aColoc_use,type=='LUAD')[order(subset(aColoc_use,type=='LUAD')$p_SMR),]
aColoc_use2 <- subset(aColoc_use2,!duplicated(aColoc_use2$Gene))
aColoc_use3 <- subset(aColoc_use,type=='LUSC')[order(subset(aColoc_use,type=='LUSC')$p_SMR),]
aColoc_use3 <- subset(aColoc_use3,!duplicated(aColoc_use3$Gene))
aColoc_use <- rbind(aColoc_use1,aColoc_use2,aColoc_use3)
sColoc_use <- subset(sColoc_all,Gene%in%sGene)
sColoc_use1 <- subset(sColoc_use,type=='NSCLC')[order(subset(sColoc_use,type=='NSCLC')$p_SMR),]
sColoc_use1 <- subset(sColoc_use1,!duplicated(sColoc_use1$Gene))
sColoc_use2 <- subset(sColoc_use,type=='LUAD')[order(subset(sColoc_use,type=='LUAD')$p_SMR),]
sColoc_use2 <- subset(sColoc_use2,!duplicated(sColoc_use2$Gene))
sColoc_use3 <- subset(sColoc_use,type=='LUSC')[order(subset(sColoc_use,type=='LUSC')$p_SMR),]
sColoc_use3 <- subset(sColoc_use3,!duplicated(sColoc_use3$Gene))
sColoc_use <- rbind(sColoc_use1,sColoc_use2,sColoc_use3)
eColoc_use <- eColoc_use[,c('type','Gene','p_SMR','p_SMR_FDR','p_HEIDI')]
aColoc_use <- aColoc_use[,c('type','Gene','p_SMR','p_SMR_FDR','p_HEIDI')]
sColoc_use <- sColoc_use[,c('type','Gene','p_SMR','p_SMR_FDR','p_HEIDI')]
eColoc_use$QTL <- 'eQTL'
aColoc_use$QTL <- 'aQTL'
sColoc_use$QTL <- 'sQTL'
dat <- rbind(eColoc_use,aColoc_use,sColoc_use)
dat$type <- factor(dat$type,levels = c('NSCLC','LUAD','LUSC'))
dat$p_SMR <- as.numeric(dat$p_SMR)
names(dat)[2] <- 'gene_name'
cytoband <- read.csv('smr_cytoband_res.csv')
cytoband <- cytoband[,c('V1','V2','V3','V5','V9')]
names(cytoband) <- c('chr','start','end','gene_name','cyto')
dat <- dplyr::left_join(dat,cytoband,by = 'gene_name')
dat$gene_name_cyto <- paste0(dat$gene_name,'(',dat$cyto,')')

dat1 <- subset(dat,QTL=='eQTL')
dat1 <- dat1 %>%
  mutate(
    chr_num = as.numeric(str_extract(chr, "\\d+")),
    arm = ifelse(str_detect(cyto, "p"), "p", "q"),
    band = as.numeric(str_extract(cyto, "(?<=p|q)\\d+(\\.\\d+)?")))
dat1 <- dat1 %>%
  arrange(chr_num, arm, band, gene_name)
dat1 <- dat1 %>%
  mutate(gene_name_cyto = factor(gene_name_cyto, levels = rev(unique(gene_name_cyto))))

p4 <- ggplot(dat1, aes(x = type, y = gene_name_cyto, fill = -log10(p_SMR))) +
  geom_tile(color = "white", width = 0.95, height = 0.95) + 
  scale_fill_gradient2(
    low = "#1a9850", 
    mid = "#FEF9B6",  
    high = "#D73027",
    midpoint = 1.30103,    
    name = "SMR\n-log10(p_SMR)",
    limits = c(0, 8)) +
  geom_text(data = subset(dat1,p_SMR_FDR<0.1& p_HEIDI>0.05), 
            aes(label = "*"), color = "black", size = 5) +
  labs(x = "",y = "",title = "") +
  theme_minimal(base_size = 14) +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 0.5, size = 10, color = "black"),
    axis.text.y = element_text(size = 10, color = "black"),
    axis.title = element_text(size = 14, color = "black"),
    plot.title = element_text(size = 16, color = "black", hjust = 0.5),
    panel.grid = element_blank(), 
    legend.position = "right",   
    legend.title = element_text(size = 12, color = "black"),
    legend.text = element_text(size = 10)) +
  coord_fixed() 

dat1 <- subset(dat,QTL=='aQTL')
dat1 <- dat1 %>%
  mutate(
    chr_num = as.numeric(str_extract(chr, "\\d+")),
    arm = ifelse(str_detect(cyto, "p"), "p", "q"),
    band = as.numeric(str_extract(cyto, "(?<=p|q)\\d+(\\.\\d+)?")))
dat1 <- dat1 %>%
  arrange(chr_num, arm, band, gene_name)
dat1 <- dat1 %>%
  mutate(gene_name_cyto = factor(gene_name_cyto, levels = rev(unique(gene_name_cyto))))

p5 <- ggplot(dat1, aes(x = type, y = gene_name_cyto, fill = -log10(p_SMR))) +
  geom_tile(color = "white", width = 0.95, height = 0.95) + 
  scale_fill_gradient2(
    low = "#1a9850", 
    mid = "#FEF9B6",   
    high = "#D73027",
    midpoint = 1.30103,    
    name = "SMR\n-log10(p_SMR)",
    limits = c(0, 8)) +
  geom_text(data = subset(dat1,p_SMR_FDR<0.1& p_HEIDI>0.05), 
            aes(label = "*"), color = "black", size = 5) +
  labs(x = "",y = "",title = "") +
  theme_minimal(base_size = 14) +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 0.5, size = 10, color = "black"),
    axis.text.y = element_text(size = 10, color = "black"),
    axis.title = element_text(size = 14, color = "black"),
    plot.title = element_text(size = 16, color = "black", hjust = 0.5),
    panel.grid = element_blank(), # 去掉网格线
    legend.position = "right",    # 图例放置右侧
    legend.title = element_text(size = 12, color = "black"),
    legend.text = element_text(size = 10)) +
  coord_fixed() 

dat1 <- subset(dat,QTL=='sQTL')
dat1 <- dat1 %>%
  mutate(
    chr_num = as.numeric(str_extract(chr, "\\d+")),
    arm = ifelse(str_detect(cyto, "p"), "p", "q"),
    band = as.numeric(str_extract(cyto, "(?<=p|q)\\d+(\\.\\d+)?")))
dat1 <- dat1 %>%
  arrange(chr_num, arm, band, gene_name)
dat1 <- dat1 %>%
  mutate(gene_name_cyto = factor(gene_name_cyto, levels = rev(unique(gene_name_cyto))))

p6 <- ggplot(dat1, aes(x = type, y = gene_name_cyto, fill = -log10(p_SMR))) +
  geom_tile(color = "white", width = 0.95, height = 0.95) + 
  scale_fill_gradient2(
    low = "#1a9850",  
    mid = "#FEF9B6",   
    high = "#D73027", 
    midpoint = 1.30103,    
    name = "SMR\n-log10(p_SMR)",
    limits = c(0, 8)) +
  geom_text(data = subset(dat1,p_SMR_FDR<0.1& p_HEIDI>0.05), 
            aes(label = "*"), color = "black", size = 5) +
  labs(x = "",y = "",title = "") +
  theme_minimal(base_size = 14) +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 0.5, size = 10, color = "black"),
    axis.text.y = element_text(size = 10, color = "black"),
    axis.title = element_text(size = 14, color = "black"),
    plot.title = element_text(size = 16, color = "black", hjust = 0.5),
    panel.grid = element_blank(), # 去掉网格线
    legend.position = "right",    # 图例放置右侧
    legend.title = element_text(size = 12, color = "black"),
    legend.text = element_text(size = 10)) +
  coord_fixed() 

combined_plot <- (p1 + (p2 / p3) ) + 
  plot_layout(widths = c(1, 1), guides = "collect") & 
  theme(legend.position = "right")  
combined_plot1 <- ( (p4 / p5 / p6) ) + 
  plot_layout(widths = c(1, 1), guides = "collect") & 
  theme(legend.position = "right")
final_plot <- combined_plot | combined_plot1  
ggsave("FigS11.pdf", plot = final_plot, width = 18, height = 12)


AI中组合图片并修改细节






12 Supplementary Figure 12


AI中完成制作






13 Supplementary Figure 13


Rscript ./run_susie.R \
--phenotype_list ainput/qtl_permuted.tsv.gz \
--cisdistance 1000000 \
--chunk "1 1" \
--covariates ainput/pdui.peer.covariates.txt \
--genotype_matrix ainput/qtl_dose.tsv.gz \
--expression_matrix ainput/expression.tsv \
--sample_meta ainput/sample_metadata.tsv \
--phenotype_meta ainput/phenotype_metadata.tsv \
--out_prefix ./finemapping_output \
--eqtlutils ./eQTLUtils/ \
--qtl_group qtl \
--write_full_susie true

Rscript ./run_susie.R \
--phenotype_list ./einput/qtl_permuted.tsv.gz \
--cisdistance 1000000 \
--chunk "1 1" \
--covariates ./einput/338LungCombine.batch.combined_covariates_5pca.txt \
--genotype_matrix ./einput/qtl_dose.tsv.gz \
--expression_matrix ./einput/expression.tsv \
--sample_meta ./einput/sample_metadata.tsv \
--phenotype_meta ./einput/phenotype_metadata.tsv \
--out_prefix ./finemapping_output \
--eqtlutils ./eQTLUtils/ \
--qtl_group qtl \
--write_full_susie true

Rscript ./run_susie.R \
--phenotype_list ./sinput/qtl_permuted.tsv.gz \
--cisdistance 1000000 \
--chunk "1 1" \
--covariates ./sinput/338LungCombine.combined_covariates_5pca.txt \
--genotype_matrix ./sinput/qtl_dose.tsv.gz \
--expression_matrix ./sinput/expression.tsv \
--sample_meta ./sinput/sample_metadata.tsv \
--phenotype_meta ./sinput/phenotype_metadata.tsv \
--out_prefix ./finemapping_output \
--eqtlutils ./eQTLUtils/ \
--qtl_group qtl \
--write_full_susie true


AI中组合图片并修改细节






14 Supplementary Figure 14


14.1 FigS14A

<http://krainer01.cshl.edu/cgi-bin/tools/ESE3/esefinder.cgi>
  
rs1665258 
>Ref
GCCCAAGTTGCTGCAGATCCTGTCACCACGAGTGGACACCCTCTGGCCACCATAGCAAA

>Alt
GCCCAAGTTGCTGCAGATCCTGTCACCACAAGTGGACACCCTCTGGCCACCATAGCAAA

14.2 FigS14B

cd /data1/qtl/code_for_submit/data/FigS14

R
rm(list = ls())
library(data.table)
library(ggplot2)
require(gridExtra)
library(ggpubr)

gene_name <- 'KIAA1841'
SNP_name <- 'rs1665258'
exp <- fread('psi_KIAA1841_exp.txt',data.table = F)
exp <- exp[,-1]
exp <- t(exp)
exp <- cbind(IID=rownames(exp),exp)
exp <- as.data.frame(exp)
names(exp)[2] <- 'KIAA1841'
query <- fread('../116_geno005_maf001_hwe06.bim_id_query')
head(query)
cbPalette <- c("#FF9900","#FF9900","#FF9900")
gen <- read.table('338_rs1665258.raw',h=T,check.names = F)
gen <- subset(gen,!is.na(gen[,ncol(gen)]))
names(gen)[7] <- paste0(query$V7[which(query$V2==sapply(strsplit(names(gen)[7],'_'),'[',1))],'_',sapply(strsplit(names(gen)[7],'_'),'[',2))
allele_last <- sapply(strsplit(names(gen)[7],'_'),'[',1)
allele <- c(sapply(strsplit(allele_last,':'),'[',3),sapply(strsplit(allele_last,':'),'[',4))
alt_allele <- paste0(sapply(strsplit(names(gen)[7],'_'),'[',2),sapply(strsplit(names(gen)[7],'_'),'[',2))
ref_allele <- paste0(setdiff(allele,sapply(strsplit(names(gen)[7],'_'),'[',2)),setdiff(allele,sapply(strsplit(names(gen)[7],'_'),'[',2)))
mix_allele <- paste0(substring(ref_allele,1,1),substring(alt_allele,1,1))
gen$rs_n <- ifelse(gen[,7]==0,paste0(ref_allele,'\n(n=',sum(gen[,7]==0),')'), ifelse(gen[,7]==2, paste0(alt_allele,'\n(n=',sum(gen[,7]==2),')'),paste0(mix_allele,'\n(n=',sum(gen[,7]==1),')')))
gen$rs_n <- factor(gen$rs_n, levels = c(paste0(ref_allele,'\n(n=',sum(gen[,7]==0),')'), paste0(mix_allele,'\n(n=',sum(gen[,7]==1),')'), paste0(alt_allele,'\n(n=',sum(gen[,7]==2),')')))
gen <- dplyr::left_join(gen,exp,by = 'IID')
gen[,9] <- as.numeric(gen[,9])
p0 <- ggplot(aes(x=gen$rs_n,y=gen[,ncol(gen)],fill = rs_n),data=gen) + 
  geom_jitter(alpha=0.2,position=position_jitterdodge(jitter.width = 0.35, 
                                                      jitter.height = 0, 
                                                      dodge.width = 0.25))+
  geom_boxplot(alpha=0.4,width=0.2,
               position=position_dodge(width=0.2),
               size=0.75,outlier.colour = NA)+
  geom_violin(alpha=0.2,width=0.7,
              position=position_dodge(width=0.2),
              size=0.75)+
  scale_fill_manual(values = cbPalette)+
  theme_classic() + 
  theme(legend.position="none") + 
  theme(text = element_text(size=16)) + 
  stat_compare_means(aes(group=gen$rs_n),label = "p.format",size=6)+
  ylab('Normalized intron usage') + xlab('rs1665258 genotype') + 
  theme(axis.text.y = element_text(size=16, color = "black"), 
        axis.text.x = element_text(size=16, color = "black"), 
        axis.title.y = element_text(size=16, color = "black"), 
        axis.title.x = element_text(size=16, color = "black"),
        plot.title = element_text(size=16, color = "black",hjust = 0.5,margin = margin(t = 0, r = 0, b = 1.5, l = 0,"cm"))) +
  theme(axis.line = element_line(colour = 'black', size = 0.5)) +
  theme(plot.margin = margin(t=0.2, r=0, b=0.2, l=0, "cm"))+
  stat_summary(fun.y = "median", geom = "point", size = 1) + 
  stat_summary(fun.y = "median", geom = "line", size = 0.8, aes(group = 1),color="#7E6148B2")
ggsave('FigS14B',p0, width=5.2 ,height= 3.7)
# 查看QTL的P值和beta值,在对应的图片上使用AI进行修改
zgrep 2:61385100:A/G /data1/qtl/test/338_QTL/338_sQTL/338sample_sqtl.txt.gz | grep ENSG00000162929.9
# 2:61372331:61384997:clu_27445_+:ENSG00000162929.9 2:61385100:A/G  92094   141 160 0.236686    2.47485e-29 -1.16879    0.0924398

14.3 FigS14C

cd /data1/qtl/code_for_submit/data/FigS14

R
rm(list = ls())
library(data.table)
library(ggplot2)
require(gridExtra)
library(ggpubr)
gene_name <- 'KIAA1841'
SNP_name <- 'rs1665258'
exp <- fread('KIAA1841_transcript_exp.txt',data.table = F)
exp <- exp[,-1]
exp <- t(exp)
exp <- cbind(IID=rownames(exp),exp)
exp <- as.data.frame(exp)
names(exp)[2] <- 'KIAA1841'
query <- fread('../116_geno005_maf001_hwe06.bim_id_query')
cbPalette <- c("#BC3C29","#3F76B4","#FF9900")
gen <- read.table('338_rs1665258.raw',h=T,check.names = F)
gen <- subset(gen,!is.na(gen[,ncol(gen)]))
names(gen)[7] <- paste0(query$V7[which(query$V2==sapply(strsplit(names(gen)[7],'_'),'[',1))],'_',sapply(strsplit(names(gen)[7],'_'),'[',2))
allele_last <- sapply(strsplit(names(gen)[7],'_'),'[',1)
allele <- c(sapply(strsplit(allele_last,':'),'[',3),sapply(strsplit(allele_last,':'),'[',4))
alt_allele <- paste0(sapply(strsplit(names(gen)[7],'_'),'[',2),sapply(strsplit(names(gen)[7],'_'),'[',2))
ref_allele <- paste0(setdiff(allele,sapply(strsplit(names(gen)[7],'_'),'[',2)),setdiff(allele,sapply(strsplit(names(gen)[7],'_'),'[',2)))
mix_allele <- paste0(substring(ref_allele,1,1),substring(alt_allele,1,1))
gen$rs_n <- ifelse(gen[,7]==0,paste0(ref_allele,'\n(n=',sum(gen[,7]==0),')'), ifelse(gen[,7]==2, paste0(alt_allele,'\n(n=',sum(gen[,7]==2),')'),paste0(mix_allele,'\n(n=',sum(gen[,7]==1),')')))
gen$rs_n <- factor(gen$rs_n, levels = c(paste0(ref_allele,'\n(n=',sum(gen[,7]==0),')'), paste0(mix_allele,'\n(n=',sum(gen[,7]==1),')'), paste0(alt_allele,'\n(n=',sum(gen[,7]==2),')')))
gen <- dplyr::left_join(gen,exp,by = 'IID')
gen[,9] <- as.numeric(gen[,9])
p0 <- ggplot(aes(x=gen$rs_n,y=gen[,ncol(gen)],fill = rs_n),data=gen) + 
  geom_jitter(alpha=0.2,position=position_jitterdodge(jitter.width = 0.35, 
                                                      jitter.height = 0, 
                                                      dodge.width = 0.25))+
  geom_boxplot(alpha=0.4,width=0.2,
               position=position_dodge(width=0.2),
               size=0.75,outlier.colour = NA)+
  geom_violin(alpha=0.2,width=0.7,
              position=position_dodge(width=0.2),
              size=0.75)+
  scale_fill_manual(values = cbPalette)+
  theme_classic() + 
  theme(legend.position="none") + 
  theme(text = element_text(size=16)) + 
  stat_compare_means(aes(group=gen$rs_n),label = "p.format",size=6)+
  ylab('Transcript expression') + xlab('rs1665258 genotype')  + 
  theme(axis.text.y = element_text(size=16, color = "black"), 
        axis.text.x = element_text(size=16, color = "black"), 
        axis.title.y = element_text(size=16, color = "black"), 
        axis.title.x = element_text(size=16, color = "black"),
        plot.title = element_text(size=16, color = "black",hjust = 0.5,margin = margin(t = 0, r = 0, b = 1.5, l = 0,"cm"))) +
  theme(axis.line = element_line(colour = 'black', size = 0.5)) +
  theme(plot.margin = margin(t=0.2, r=0, b=0.2, l=0, "cm"))+
  stat_summary(fun.y = "median", geom = "point", size = 1) + 
  stat_summary(fun.y = "median", geom = "line", size = 0.8, aes(group = 1),color="#7E6148B2")
ggsave('FigS14C.pdf',p0, width=5.8 ,height= 3.2)
# 查看QTL的P值和beta值,在对应的图片上使用AI进行修改
zgrep 2:61385100:A/G /data1/qtl/3QTL/338/qtls/transcript_qtl/new/338sample_trans_qtl.txt.gz | grep ENSG00000162929.9_ENST00000398622.2
# ENSG00000162929.9_ENST00000398622.2   2:61385100:A/G  12834   141 160 0.236686    0.122834    -0.119136   0.0769799

14.4 FigS14D

cd /data1/qtl/code_for_submit/data/FigS14

R
rm(list = ls())
library(data.table)
library(forestploter)

dat <- fread('query',h =F)
names(dat) <- c("MarkerName",   "Allele1"   ,"Allele2", "Freq1",    "FreqSE",   "MinFreq",  "MaxFreq",  "Effect",   "StdErr",   "P-value",  "Direction",    "HetISq",   "HetChiSq"  ,"HetDf",   "HetPVal",  "N")
dat$type <- c('NSCLC','LUAD','LUSC')
dat <- dat[,c('Effect','StdErr','P-value','type')]
names(dat) <- c('beta','se','P','type')
dat$OR <- exp(dat$beta)
dat_up_se <- dat$beta+1.96*dat$se
dat_low_se <- dat$beta-1.96*dat$se
dat$up_95_enrichment <- exp(dat_up_se)
dat$low_95_enrichment <- exp(dat_low_se)
dat$OR <- round(dat$OR,2)
dat$up_95_enrichment <- round(dat$up_95_enrichment,2)
dat$low_95_enrichment <- round(dat$low_95_enrichment,2)
dat$'OR(95%CI)' <- paste0(dat$OR," (",dat$low_95_enrichment,"-",dat$up_95_enrichment,")",sep=" ")
multi1 <- as.data.frame(dat[,c(4,5,8,3,6,7)]) 
names(multi1)[c(1,2,3,4)] <- c('Phenotype','OR','OR \n(95%CI)','P value')
multi1$" " <- paste(rep("   ",8),collapse = " ")
p <- forest(multi1[,c(1,7,3,4)], base_size = 10,est = multi1$OR,lower = multi1$low_95_enrichment,upper = multi1$up_95_enrichment,
            ci_lty = 1,ci_lwd = 1.8,ci_col = "#762a83",ci_Theight = 0.3,ci_column = 2,ci_pch= 18,ref_line = 1,xlim = c(0.9,1.2),ticks_at = c(0.9,1,1.1))
g2 <- add_border(p,part = "header",where="bottom")
g2 <- add_border(g2,part = "header",where="top")
g2 <- add_border(g2,row=3,where="bottom")
pdf(file = 'FigS14D.pdf',width = 6,height = 6)
g2
dev.off()

14.5 FigS14E

cd /data1/qtl/code_for_submit/data

R
library(data.table)
library(karyoploteR)
library(BiocFileCache)

library(rtracklayer)
library(GenomicFeatures)
rm(list = ls())
snp <- data.frame(chr='chr2',start=61385099,end=61385100,rsID='rs1665258')
ld_range <- toGRanges(snp)
TP53.region <- toGRanges("chr2:61385000-61395000") 
glycine <- makeTxDbFromGFF("gencode.v19.annotation.gff3")
kp <- plotKaryotype(zoom = TP53.region)
genes.data <- makeGenesDataFromTxDb(glycine,
                                    karyoplot=kp,
                                    plot.transcripts = TRUE, 
                                    plot.transcripts.structure = TRUE)
genes.data <- mergeTranscripts(genes.data)
annotation.marks <- data.frame( path = c("wgEncodeBroadHistoneA549H3k04me3Etoh02Sig.bigWig","wgEncodeBroadHistoneA549H3k04me1Etoh02Sig.bigWig",  "wgEncodeBroadHistoneA549H3k27acEtoh02Sig.bigWig", "wgEncodeBroadHistoneA549H3k09acEtoh02Sig.bigWig"),signal = c( "H3K4me3 (ENCODE)", "H3K4me1 (ENCODE)", "H3K27ac (ENCODE)", "H3K9ac (ENCODE)"))
set.seed(22)
pdf("FigS14E.pdf", width = 11.5, height = 8)
pp <- getDefaultPlotParams(plot.type = 1)
pp$leftmargin <- 0.15
pp$topmargin <- 5
pp$bottommargin <- 15
pp$ideogramheight <- 5
pp$data1inmargin <- 10
kp <- plotKaryotype(zoom = TP53.region, cex=0.7, plot.params = pp)
kpAddBaseNumbers(kp, tick.dist = 5000, minor.tick.dist = 1000,add.units = TRUE, cex = 0.7, digits = 6)
kpPlotGenes(  kp, data = genes.data,  r0 = 0, r1 = 0.1, mark.height=0.5,mark.width=1,non.coding.exons.height=0.5, gene.name.cex = 0.6,  gene.name.position = "right")
color <- c('#F79014','#D84035','#4596CE','#4eb755','purple')
total.tracks <- nrow(annotation.marks) + 1.2
out.at <- autotrack(1:nrow(annotation.marks), total.tracks, margin = 0.1, r0 = 0.3)
for (i in seq_len(nrow(annotation.marks))) {
  bigwig.file <- annotation.marks$path[i]
  at <- autotrack(i, nrow(annotation.marks), r0 = out.at$r0, r1 = out.at$r1, margin = 0.3)
  kp <- kpPlotBigWig(
    kp, data = bigwig.file, ymax = "visible.region",
    r0 = at$r0, r1 = at$r1,
    col = color[i], border = NA)  
  computed.ymax <- ceiling(kp$latest.plot$computed.values$ymax)
  kpAxis(kp, ymin = 0, ymax = computed.ymax, numticks = 2, r0 = at$r0, r1 = at$r1, cex = 0.7)
  kpAddLabels(
    kp, labels = annotation.marks$signal[i],
    r0 = at$r0, r1 = at$r1,
    cex = 0.7, label.margin = 0.035)}
at <- autotrack(total.tracks, total.tracks, margin = 0.1, r0 = 0.18)
kp <- kpPoints(
  kp, data = ld_range,col='red',
  r0 = at$r0, r1 = at$r1,cex = 0.5,
  y = sample(seq(0.1, 1, 0.1), 1, replace=T))
dev.off()

14.6 FigS14F

14.7 top

cd /data1/qtl/code_for_submit/data/FigS14

R
options(stringsAsFactors=F)
library(data.table)
library(limma)
library(ggpubr)
library(patchwork) 
library(ggsci) 
library(tidyverse)
library(dplyr)
rm(list=ls())
dat_gene <- read.table('KIAA1841_NSCLC_NJLCC.txt',row.names = 1,sep = '\t',check.names = F,header = T)
dat_gene <- as.data.frame(dat_gene)
dat_gene$type <- c(rep('Normal\n(N = 116)',116),rep('Tumor\n(N = 116)',116))
dat_gene[,1] <- log2(dat_gene[,1]+1)
colnames(dat_gene)=c("expression","Type")
dat_gene$Type=factor(dat_gene$Type)
dat_gene$gene_name <- 'KIAA1841'
custom_palette <- c("#0072B5", "#BC3C29")  
p <- ggboxplot(dat_gene, x = "Type", y = "expression",color = "Type", palette = custom_palette,short.panel.labs = FALSE) +
  labs( x = NULL,  y = 'Expression of KIAA1841') +
  theme(axis.text.y = element_text(size = 16), 
        axis.text.x = element_text(size = 16), 
        axis.title = element_text(size = 18), 
        strip.text = element_text(size = 20), 
        strip.background = element_rect(color = "black", fill = "lightgray", size = 1.5), 
        strip.border = element_rect(color = "black", size = 2), 
        panel.grid.major = element_blank(),  
        panel.grid.minor = element_blank(), 
        panel.border = element_rect(color = "black", fill = NA, size = 1.5),  
        legend.text = element_text(size = 16), 
        legend.title = element_text(size = 16)  ) +
  geom_boxplot(aes(color = Type), size = 1.5) +  
  geom_jitter(aes(color = Type), size = 5, width = 0.2, shape = 16,alpha = 0.5) + 
  stat_compare_means(method = "t.test",label = "p.format",size = 7, paired = TRUE) 
ggsave(p,filename ='Figure 14F-top.pdf',width = 6.5,height = 7)

14.8 bottom

cd /data1/qtl/code_for_submit/data/FigS14

R
rm(list = ls())
library(data.table)
library(limma)
library(ggpubr)
library(patchwork) 
library(ggsci) 
dat_gene <- read.table('KIAA1841_NSCLC.txt',row.names = 1,sep = '\t',check.names = F,header = T)
type <- as.character(sapply(strsplit(rownames(dat_gene),"\\-"), "[", 4))
type <- sapply(strsplit(type,""), "[", 1)
type=gsub("2", "1", type)
type <- as.numeric(type)
dat_gene <- cbind(dat_gene,type=type)
dat_gene <- as.data.frame(dat_gene)
table(dat_gene$type)
dat_gene$type <- ifelse(dat_gene$type==0,'Tumor\n(N = 1037)','Normal\n(N = 108)')
dat_gene[,1] <- log2(dat_gene[,1]+1)
colnames(dat_gene)=c("expression","Type")
dat_gene$Type=factor(dat_gene$Type)
dat_gene$gene_name <- 'KIAA1841'
custom_palette <- c("#0072B5", "#BC3C29")  
p <- ggboxplot(dat_gene, x = "Type", y = "expression",
               color = "Type", palette = custom_palette, 
               short.panel.labs = FALSE) +
  labs(x = NULL, y = 'Expression of KIAA1841') +
  theme(axis.text.y = element_text(size = 16), 
        axis.text.x = element_text(size = 16), 
        axis.title = element_text(size = 18),
        strip.text = element_text(size = 20), 
        strip.background = element_rect(color = "black", fill = "lightgray", size = 1.5),  
        strip.border = element_rect(color = "black", size = 2),  
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),  
        panel.border = element_rect(color = "black", fill = NA, size = 1.5),  
        legend.text = element_text(size = 16),  
        legend.title = element_text(size = 16)) +
  geom_boxplot(aes(color = Type), size = 1.5) +  
  geom_jitter(aes(color = Type), size = 5, width = 0.2, shape = 16,alpha = 0.5) + 
  stat_compare_means(label = "p.format",size = 7,method = 'wilcox.test', digits = 3)
ggsave(p,filename = 'Figure 14F-bottom,.pdf',width = 6.5,height = 7)


AI中组合图片并修改细节






15 Supplementary Figure 15


15.1 FigS15A-top

cd /data1/qtl/code_for_submit/data/FigS15

R
rm(list = ls())
library(data.table)
library(ggplot2)
conditional_res <- fread('NSCLC.conditional.chr1.loc_19.GWAS.condition.intron.csv')
conditional_res <- subset(conditional_res,!is.na(pv))
conditional_res$POS <- as.integer(sapply(strsplit(conditional_res$snp,':'),'[',2))
utr <- fread('../3UTR_location.338.txt')
start <- utr[which(utr$gene_id=='ENST00000413518.1:ENSG00000142396.6:chr19:+'),]$start
end <- utr[which(utr$gene_id=='ENST00000413518.1:ENSG00000142396.6:chr19:+'),]$end
min.limit <- mean(c(as.numeric(start),as.numeric(end))) - 300000
max.limit <- mean(c(as.numeric(start),as.numeric(end))) + 300000
conditional_res <- subset(conditional_res, (POS > min.limit) & (POS < max.limit) & !is.na(pv))
head(conditional_res)
rawgwas <- conditional_res[,c('POS','pv')]
condgwas <- conditional_res[,c('POS','cond.pv')]
colnames(condgwas) <- c('POS','pv')
rawgwas$note <- 'Main GWAS'
condgwas$note <- paste0('Conditioned on\npredictor effect')
dat1 <- rbind(rawgwas,condgwas)
dat1 <- subset(dat1,!is.na(pv))
len <- max(-log10(dat1$pv))
len <- ifelse(len<5,5,len)

pdf(file = 'Figure S15A-top.pdf',width=6.89, height=2.63)
ggplot(data=dat1, aes(x=POS/1e6, y=-log10(pv))) + 
  geom_point(aes(color = note), pch=16, size=2.25) + theme_classic() +
  scale_color_manual(values = rev(c("gray","#2D417F")),name = '')   +           
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        panel.background = element_blank()) +
  xlab('Chr19 physical position (Mb)') +
  ylab(expression(-log[10]*italic("P"))) +scale_y_continuous(breaks = seq(0,len,len%/%5),labels = seq(0,len,len%/%5), expand=c(0.05, 0)) + scale_x_continuous(expand = c(0, 0)) +
  theme(axis.text.y = element_text(size=15, color = "black"), axis.text.x = element_text(size=15, color = "black"), axis.title.y = element_text(size=15, color = "black"), axis.title.x = element_text(size=15, color = "black")) +
  theme(axis.line = element_line(colour = 'black', size = 0.3))+ geom_point(data = subset(dat1, POS == 58827717)[1,], aes(x=POS/1e6, y=-log10(pv)), color = "#7F00BA", pch=16, size=2.25) + theme(legend.key.size = unit(0.5, 'cm')) + 
  geom_text(data = subset(dat1,POS==58827717)[1,],aes(x = POS/1e6+0.05 , y = -log10(pv), label = 'rs28695411')) +theme(legend.position = c(0.15, 0.8),legend.title = element_blank(),legend.text=element_text(size=12),legend.background = element_rect(fill=F,linetype="solid", size=0.1,colour ="black"))
dev.off()

15.2 FigS15A-bottom

dats <- read.table('chr19_condition.top-QTL.txt',h=T)
dats$pos <- as.numeric(sapply(strsplit(dats$SNP,':'),'[',2))
dat <- subset(dats, (pos > min.limit) & (pos < max.limit) )
rawgwas <- dat[,c('pos','p')]
condgwas <- dat[,c('pos','pC')]
colnames(condgwas) <- colnames(rawgwas)
rawgwas$note <- 'Main eQTL'
condgwas$note <- 'Conditioned on\ntop eQTL'
dat1 <- rbind(rawgwas,condgwas)
dat1 <- subset(dat1,!is.na(p))
len <- max(-log10(na.omit(dat1$p)))
len <- ifelse(len<5,5,len)

pdf(file = 'Figure 15A-bottom',width=6.89, height=2.63)
ggplot(data=dat1, aes(x=pos/1e6, y=-log10(p))) + 
  geom_point(aes(color = note), pch=16, size=2.25) + theme_classic() +
  scale_color_manual(values = rev(c("gray","#2D417F")),name = '') +         
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        panel.background = element_blank()) +
  xlab('Chr19 physical position (Mb)') +
  ylab(expression(-log[10]*italic("P"))) + scale_y_continuous(breaks = seq(0,len,len%/%5),labels = seq(0,len,len%/%5), expand=c(0.05, 0)) + scale_x_continuous(expand = c(0, 0)) +
  theme(axis.text.y = element_text(size=15, color = "black"), axis.text.x = element_text(size=15, color = "black"), axis.title.y = element_text(size=15, color = "black"), axis.title.x = element_text(size=15, color = "black")) +
  theme(axis.line = element_line(colour = 'black', size = 0.3)) + theme(legend.key.size = unit(0.5, 'cm')) + geom_point(data = subset(dat1, pos == 58827717)[1,], aes(x=pos/1e6, y=-log10(p)), color = "#7F00BA", pch=16, size=2.25) + theme(legend.key.size = unit(0.5, 'cm')) + 
  geom_text(data = subset(dat1,pos==58827717)[1,],aes(x = pos/1e6+0.05 , y = -log10(p), label = 'rs28695411')) +theme(legend.position = c(0.15, 0.8),legend.title = element_blank(),legend.text=element_text(size=12),legend.background = element_rect(fill=F,linetype="solid", size=0.1,colour ="black"))
dev.off()   

15.3 FigS15B-top

cd /data1/qtl/code_for_submit/data/FigS15

R
rm(list = ls())
library(data.table)
library(forestploter)
dat <- fread('query_rs3206946',h =F)
names(dat) <- c("MarkerName",   "Allele1"   ,"Allele2", "Freq1",    "FreqSE",   "MinFreq",  "MaxFreq",  "Effect",   "StdErr",   "P-value",  "Direction",    "HetISq",   "HetChiSq"  ,"HetDf",   "HetPVal",  "N")
dat$type <- c('NSCLC','LUAD','LUSC')
dat <- dat[,c('Effect','StdErr','P-value','type')]
names(dat) <- c('beta','se','P','type')
dat$OR <- exp(dat$beta)
dat_up_se <- dat$beta+1.96*dat$se
dat_low_se <- dat$beta-1.96*dat$se
dat$up_95_enrichment <- exp(dat_up_se)
dat$low_95_enrichment <- exp(dat_low_se)
dat$OR <- round(dat$OR,2)
dat$up_95_enrichment <- round(dat$up_95_enrichment,2)
dat$low_95_enrichment <- round(dat$low_95_enrichment,2)
dat$'OR(95%CI)' <- paste0(dat$OR," (",dat$low_95_enrichment,"-",dat$up_95_enrichment,")",sep=" ")
multi1 <- as.data.frame(dat[,c(4,5,8,3,6,7)]) 
names(multi1)[c(1,2,3,4)] <- c('Phenotype','OR','OR \n(95%CI)','P value')
multi1$" " <- paste(rep("   ",8),collapse = " ")
p <- forest(multi1[,c(1,7,3,4)], base_size = 10,est = multi1$OR,lower = multi1$low_95_enrichment,upper = multi1$up_95_enrichment,
            ci_lty = 1,ci_lwd = 1.8,ci_col = "#762a83",ci_Theight = 0.3,ci_column = 2,ci_pch= 18,ref_line = 1,xlim = c(0.9,1.2),ticks_at = c(0.9,1,1.1))
g2 <- add_border(p,part = "header",where="bottom")
g2 <- add_border(g2,part = "header",where="top")
g2 <- add_border(g2,row=3,where="bottom")
pdf(file = 'FigS15B-top.pdf',width = 6,height = 6)
g2
dev.off()

15.4 FigS15B-bottom

dat <- fread('query_rs3206947',h =F)
names(dat) <- c("MarkerName",   "Allele1"   ,"Allele2", "Freq1",    "FreqSE",   "MinFreq",  "MaxFreq",  "Effect",   "StdErr",   "P-value",  "Direction",    "HetISq",   "HetChiSq"  ,"HetDf",   "HetPVal",  "N")
dat$type <- c('NSCLC','LUAD','LUSC')
dat <- dat[,c('Effect','StdErr','P-value','type')]
names(dat) <- c('beta','se','P','type')
dat$OR <- exp(dat$beta)
dat_up_se <- dat$beta+1.96*dat$se
dat_low_se <- dat$beta-1.96*dat$se
dat$up_95_enrichment <- exp(dat_up_se)
dat$low_95_enrichment <- exp(dat_low_se)
dat$OR <- round(dat$OR,2)
dat$up_95_enrichment <- round(dat$up_95_enrichment,2)
dat$low_95_enrichment <- round(dat$low_95_enrichment,2)
dat$'OR(95%CI)' <- paste0(dat$OR," (",dat$low_95_enrichment,"-",dat$up_95_enrichment,")",sep=" ")
multi1 <- as.data.frame(dat[,c(4,5,8,3,6,7)]) 
names(multi1)[c(1,2,3,4)] <- c('Phenotype','OR','OR \n(95%CI)','P value')
multi1$" " <- paste(rep("   ",8),collapse = " ")
p <- forest(multi1[,c(1,7,3,4)], base_size = 10,est = multi1$OR,lower = multi1$low_95_enrichment,upper = multi1$up_95_enrichment,
            ci_lty = 1,ci_lwd = 1.8,ci_col = "#762a83",ci_Theight = 0.3,ci_column = 2,ci_pch= 18,ref_line = 1,xlim = c(0.9,1.2),ticks_at = c(0.9,1,1.1))
g2 <- add_border(p,part = "header",where="bottom")
g2 <- add_border(g2,part = "header",where="top")
g2 <- add_border(g2,row=3,where="bottom")
pdf(file = 'FigS15B-top.pdf',width = 6,height = 6)
g2
dev.off()

15.5 FigS15C

cd /data1/qtl/code_for_submit/data

R
rm(list = ls())
library(data.table)
library(ggplot2)
exp <- fread('338combineTPM.expression.bed.gz')
pdui <- fread('338sample_Lung.apa.bed.gz')
exp <- subset(exp,gene_id=='ENSG00000142396.6')
exp1 <- exp[,-c(1:4)]
pdui <- subset(pdui,grepl('ENSG00000142396.6',pdui$Gene))
pdui1 <- pdui[,-c(1:4)]
exp1 <- t(exp1)
exp_use <- data.frame(ID = rownames(exp1),exp = exp1[,1])
pdui1 <- t(pdui1)
pdui_use <- data.frame(ID = rownames(pdui1),pdui = pdui1[,1])
dat <- merge(exp_use,pdui_use,by = 'ID')
test <- cor.test(as.numeric(dat$exp),as.numeric(dat$pdui),method='spearman')
test
p <- ggplot(aes(x = exp, y = pdui), data = dat) + 
  geom_point(size = 1.5, color = '#2D417F', alpha = 0.7) + 
  xlab('Nomalized PDUI of ERVK3-1') + 
  ylab('Nomalized gene expression\nof ERVK3-1') + 
  theme_bw() +
  theme(axis.text = element_text(size = 14, color = 'black'),
        axis.title = element_text(size = 14),
        axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)),
        axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0))) + 
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank()) +
  theme(panel.border = element_rect(colour = "black", fill = NA, size = 1),
        axis.line = element_line(colour = "black", size = 0.5, linetype = 1),
        axis.ticks.length = unit(7, "pt"),
        axis.ticks = element_line(colour = "black")) + 
  geom_vline(xintercept = 0, linetype = "dashed", color = 'grey') + 
  geom_hline(yintercept = 0, linetype = "dashed", color = 'grey') + 
  xlim(-2.5, 2.5) + ylim(-2.5, 2.5) +
  annotate("text", x = 0, y = 2.4, label = paste0('r = ', round(test$estimate, 3)), 
           size = 5) +
  geom_smooth(method = lm, formula = y ~ x, colour = "black", se = FALSE, lwd = 1)
ggsave('FigS15C.pdf',p,width=5.5 ,height= 3.5)

15.6 FigS15D

cd /data1/qtl/code_for_submit/data/FigS15

R
rm(list = ls())
library(data.table)
library(ggplot2)
require(gridExtra)
library(ggpubr)
exp <- fread('ERVK3-1_exp.txt',data.table = F)
exp <- exp[,-1]
exp <- t(exp)
exp <- cbind(IID=rownames(exp),exp)
exp <- as.data.frame(exp)
names(exp)[2] <- 'ERVK3-1'
query <- fread('../116_geno005_maf001_hwe06.bim_id_query')
head(query)
cbPalette <- c("#0072B5","#0072B5","#0072B5")
gen <- read.table('338_rs3206946.raw',h=T,check.names = F)
gen <- subset(gen,!is.na(gen[,ncol(gen)]))
names(gen)[7] <- paste0(query$V7[which(query$V2==sapply(strsplit(names(gen)[7],'_'),'[',1))],'_',sapply(strsplit(names(gen)[7],'_'),'[',2))
allele_last <- sapply(strsplit(names(gen)[7],'_'),'[',1)
allele <- c(sapply(strsplit(allele_last,':'),'[',3),sapply(strsplit(allele_last,':'),'[',4))
alt_allele <- paste0(sapply(strsplit(names(gen)[7],'_'),'[',2),sapply(strsplit(names(gen)[7],'_'),'[',2))
ref_allele <- paste0(setdiff(allele,sapply(strsplit(names(gen)[7],'_'),'[',2)),setdiff(allele,sapply(strsplit(names(gen)[7],'_'),'[',2)))
mix_allele <- paste0(substring(ref_allele,1,1),substring(alt_allele,1,1))
gen$rs_n <- ifelse(gen[,7]==0,paste0(ref_allele,'\n(n=',sum(gen[,7]==0),')'), ifelse(gen[,7]==2, paste0(alt_allele,'\n(n=',sum(gen[,7]==2),')'),paste0(mix_allele,'\n(n=',sum(gen[,7]==1),')')))
gen$rs_n <- factor(gen$rs_n, levels = c(paste0(ref_allele,'\n(n=',sum(gen[,7]==0),')'), paste0(mix_allele,'\n(n=',sum(gen[,7]==1),')'), paste0(alt_allele,'\n(n=',sum(gen[,7]==2),')')))
gen <- dplyr::left_join(gen,exp,by = 'IID')
gen[,9] <- as.numeric(gen[,9])
p <- ggplot(aes(x=gen$rs_n,y=gen[,ncol(gen)],fill = rs_n),data=gen) + 
  geom_jitter(alpha=0.2,position=position_jitterdodge(jitter.width = 0.35, jitter.height = 0, dodge.width = 0.25))+
  geom_boxplot(alpha=0.4,width=0.2,position=position_dodge(width=0.2),size=0.75,outlier.colour = NA)+
  geom_violin(alpha=0.2,width=0.7,position=position_dodge(width=0.2),size=0.75)+
  scale_fill_manual(values = cbPalette)+
  theme_classic() + 
  theme(legend.position="none") + 
  theme(text = element_text(size=16)) + 
  ylab('Normalized gene expression') + xlab('rs3206946 genotype') + 
  theme(axis.text.y = element_text(size=16, color = "black"), 
        axis.text.x = element_text(size=16, color = "black"), 
        axis.title.y = element_text(size=16, color = "black"), 
        axis.title.x = element_text(size=16, color = "black"),
        plot.title = element_text(size=16, color = "black",hjust = 0.5,margin = margin(t = 0, r = 0, b = 1.5, l = 0,"cm"))) +
  theme(axis.line = element_line(colour = 'black', size = 0.5)) +
  theme(plot.margin = margin(t=0.2, r=0, b=0.2, l=0, "cm"))+
  stat_summary(fun.y = "median", geom = "point", size = 1) + 
  stat_summary(fun.y = "median", geom = "line", size = 0.8, aes(group = 1),color="#7E6148B2") +labs(title = 'ERVK3-1')
ggsave('Figure 15D',p, width=5.8 ,height= 5.7)

15.7 FigS15E

cd /data1/qtl/code_for_submit/data/FigS15

R
rm(list = ls())
library(data.table)
library(ggplot2)
require(gridExtra)
library(ggpubr)
exp <- fread('ERVK3-1_exp.txt',data.table = F)
exp <- exp[,-1]
exp <- t(exp)
exp <- cbind(IID=rownames(exp),exp)
exp <- as.data.frame(exp)
names(exp)[2] <- 'ERVK3-1'
query <- fread('../116_geno005_maf001_hwe06.bim_id_query')
head(query)
cbPalette <- c("#0072B5","#0072B5","#0072B5")
gen <- read.table('338_rs3206947.raw',h=T,check.names = F)
gen <- subset(gen,!is.na(gen[,ncol(gen)]))
names(gen)[7] <- paste0(query$V7[which(query$V2==sapply(strsplit(names(gen)[7],'_'),'[',1))],'_',sapply(strsplit(names(gen)[7],'_'),'[',2))
allele_last <- sapply(strsplit(names(gen)[7],'_'),'[',1)
allele <- c(sapply(strsplit(allele_last,':'),'[',3),sapply(strsplit(allele_last,':'),'[',4))
alt_allele <- paste0(sapply(strsplit(names(gen)[7],'_'),'[',2),sapply(strsplit(names(gen)[7],'_'),'[',2))
ref_allele <- paste0(setdiff(allele,sapply(strsplit(names(gen)[7],'_'),'[',2)),setdiff(allele,sapply(strsplit(names(gen)[7],'_'),'[',2)))
mix_allele <- paste0(substring(ref_allele,1,1),substring(alt_allele,1,1))
gen$rs_n <- ifelse(gen[,7]==0,paste0(ref_allele,'\n(n=',sum(gen[,7]==0),')'), ifelse(gen[,7]==2, paste0(alt_allele,'\n(n=',sum(gen[,7]==2),')'),paste0(mix_allele,'\n(n=',sum(gen[,7]==1),')')))
gen$rs_n <- factor(gen$rs_n, levels = c(paste0(ref_allele,'\n(n=',sum(gen[,7]==0),')'), paste0(mix_allele,'\n(n=',sum(gen[,7]==1),')'), paste0(alt_allele,'\n(n=',sum(gen[,7]==2),')')))
gen <- dplyr::left_join(gen,exp,by = 'IID')
gen[,9] <- as.numeric(gen[,9])
p <- ggplot(aes(x=gen$rs_n,y=gen[,ncol(gen)],fill = rs_n),data=gen) + 
  geom_jitter(alpha=0.2,position=position_jitterdodge(jitter.width = 0.35, jitter.height = 0, dodge.width = 0.25))+
  geom_boxplot(alpha=0.4,width=0.2,position=position_dodge(width=0.2),size=0.75,outlier.colour = NA)+
  geom_violin(alpha=0.2,width=0.7,position=position_dodge(width=0.2),size=0.75)+
  scale_fill_manual(values = cbPalette)+
  theme_classic() + 
  theme(legend.position="none") + 
  theme(text = element_text(size=16)) + 
  ylab('Normalized gene expression') + xlab('rs3206947 genotype') + 
  theme(axis.text.y = element_text(size=16, color = "black"), 
        axis.text.x = element_text(size=16, color = "black"), 
        axis.title.y = element_text(size=16, color = "black"), 
        axis.title.x = element_text(size=16, color = "black"),
        plot.title = element_text(size=16, color = "black",hjust = 0.5,margin = margin(t = 0, r = 0, b = 1.5, l = 0,"cm"))) +
  theme(axis.line = element_line(colour = 'black', size = 0.5)) +
  theme(plot.margin = margin(t=0.2, r=0, b=0.2, l=0, "cm"))+
  stat_summary(fun.y = "median", geom = "point", size = 1) + 
  stat_summary(fun.y = "median", geom = "line", size = 0.8, aes(group = 1),color="#7E6148B2") +labs(title = 'ERVK3-1')
ggsave('Figure 15E',p, width=5.8 ,height= 5.7)

15.8 FigS15F-left

cd /data1/qtl/code_for_submit/data/FigS15

R
options(stringsAsFactors=F)
library(data.table)
library(limma)
library(ggpubr)
library(patchwork) 
library(ggsci) 
library(tidyverse)
library(dplyr)
rm(list=ls())
dat_gene <- read.table('ERVK3-1_NSCLC_NJLCC.txt',row.names = 1,sep = '\t',check.names = F,header = T)
dat_gene <- as.data.frame(dat_gene)
dat_gene$type <- c(rep('Normal\n(N = 116)',116),rep('Tumor\n(N = 116)',116))
dat_gene[,1] <- log2(dat_gene[,1]+1)
colnames(dat_gene)=c("expression","Type")
dat_gene$Type=factor(dat_gene$Type)
dat_gene$gene_name <- 'ERVK3-1'
custom_palette <- c("#0072B5", "#BC3C29")  
p <- ggboxplot(dat_gene, x = "Type", y = "expression",color = "Type", palette = custom_palette,short.panel.labs = FALSE) +
  labs( x = NULL,  y = 'Expression of ERVK3-1') +
  theme(axis.text.y = element_text(size = 16), 
        axis.text.x = element_text(size = 16), 
        axis.title = element_text(size = 18), 
        strip.text = element_text(size = 20), 
        strip.background = element_rect(color = "black", fill = "lightgray", size = 1.5), 
        strip.border = element_rect(color = "black", size = 2), 
        panel.grid.major = element_blank(),  
        panel.grid.minor = element_blank(), 
        panel.border = element_rect(color = "black", fill = NA, size = 1.5),  
        legend.text = element_text(size = 16), 
        legend.title = element_text(size = 16)  ) +
  geom_boxplot(aes(color = Type), size = 1.5) +  
  geom_jitter(aes(color = Type), size = 5, width = 0.2, shape = 16,alpha = 0.5) + 
  stat_compare_means(method = "t.test",label = "p.format",size = 7, paired = TRUE) 
ggsave(p,filename ='Figure 15F-left.pdf',width = 6.5,height = 7)

15.9 FigS15F-right

cd /data1/qtl/code_for_submit/data/FigS15

R
rm(list = ls())
library(data.table)
library(limma)
library(ggpubr)
library(patchwork) 
library(ggsci) 
dat_gene <- read.table('ERVK3-1_NSCLC.txt',row.names = 1,sep = '\t',check.names = F,header = T)
type <- as.character(sapply(strsplit(rownames(dat_gene),"\\-"), "[", 4))
type <- sapply(strsplit(type,""), "[", 1)
type=gsub("2", "1", type)
type <- as.numeric(type)
dat_gene <- cbind(dat_gene,type=type)
dat_gene <- as.data.frame(dat_gene)
table(dat_gene$type)
dat_gene$type <- ifelse(dat_gene$type==0,'Tumor\n(N = 1037)','Normal\n(N = 108)')
dat_gene[,1] <- log2(dat_gene[,1]+1)
colnames(dat_gene)=c("expression","Type")
dat_gene$Type=factor(dat_gene$Type)
dat_gene$gene_name <- 'ERVK3-1'
custom_palette <- c("#0072B5", "#BC3C29")  
p <- ggboxplot(dat_gene, x = "Type", y = "expression",
               color = "Type", palette = custom_palette, 
               short.panel.labs = FALSE) +
  labs(x = NULL, y = 'Expression of ERVK3-1') +
  theme(axis.text.y = element_text(size = 16), 
        axis.text.x = element_text(size = 16), 
        axis.title = element_text(size = 18),
        strip.text = element_text(size = 20), 
        strip.background = element_rect(color = "black", fill = "lightgray", size = 1.5),  
        strip.border = element_rect(color = "black", size = 2),  
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),  
        panel.border = element_rect(color = "black", fill = NA, size = 1.5),  
        legend.text = element_text(size = 16),  
        legend.title = element_text(size = 16)) +
  geom_boxplot(aes(color = Type), size = 1.5) +  
  geom_jitter(aes(color = Type), size = 5, width = 0.2, shape = 16,alpha = 0.5) + 
  stat_compare_means(label = "p.format",size = 7,method = 'wilcox.test', digits = 3)
ggsave(p,filename = 'Figure 15F-right.pdf',width = 6.5,height = 7)


AI中组合图片并修改细节





Working directory: Server 39